H5File file( FILE_NAME, H5F_ACC_TRUNC );
//
// one dimension
//
hsize_t str_dimsf[2];
int xsize = arr_c_str.size();
str_dimsf[0] = 2;
str_dimsf[1] = 2;
/*
hsize_t maxdims[2];
maxdims[0] = 20;
maxdims[1] = 20;//H5S_UNLIMITED;
*/
hsize_t maxdims = H5S_UNLIMITED;
H5::DataSpace dataspace(2, str_dimsf );//,maxdims
char *matrix[2][2];
matrix[0][0] = "Astronomy";
matrix[0][1] = "Biology";
matrix[1][0] = "Physics";
matrix[1][1] = "Chemistry";
// Variable length string
H5::StrType datatype(H5::PredType::C_S1, H5T_VARIABLE);
H5::DataSet str_dataset = file.createDataSet("MyDataset", datatype, dataspace);
// str_dataset.write(arr_c_str.data(), datatype);
str_dataset.write(matrix, datatype);
hsize_t size[2];
size[0] = 10;
size[1] = 1;
hsize_t size2 = 2;
//status = H5Dextend (dataset, size);
//herr_t status = H5Dextend (str_dataset.getId(), size);
//str_dataset.extend(size);
hsize_t dims[2] = { 3, 3};
hsize_t dims2[2] = { 7, 1}; /* data2 dimensions */
hsize_t size3[2];
hsize_t dims1[2] = { 3, 3}; /* data1 dimensions */
dims[0] = 10;
dims[1] = 2;
size3[0] = 2;
size3[1] = 2;
hsize_t size4[2];
size4[0] = 3;
size4[1] = 1;
herr_t status = H5Dextend (str_dataset.getId(), size);
//str_dataset.extend( size3 );
//str_dataset.extend(&size2);
//str_dataset.extend( size4 );
H5D_space_status_t space_status;
str_dataset.getSpaceStatus(space_status);
H5::DataSpace file_space = str_dataset.getSpace();
H5::DataSpace file_space2 = H5Dget_space(str_dataset.getId());
//H5Dextend (str_dataset.getId(), size4);
return ;
}
I have tried a piece of code with a string dataset and cant extend it no matter how I try.
Am I missing anything ? In one of the posts someone suggested to query space to get
the dataset updated , that also I tried.
Note :
(1) H5::DataSpace dataspace(2, str_dimsf , maxdims );
also crashes.
(2) str_dataset.extend( always crashes ) , cant figure out how , but I tried c code with dataset id and that doesnt crash,
Appreciate any comments , suggestions , pointers .
thanks & warm regards
~ Rudresh
Rudresh, how are you? The basic problem is that the dataset created by
file.createDataSet("MyDataset", datatype, dataspace);
is not extendible. It has contiguous storage layout, which fixes its logical extent at creation time. To be extendible, you must create a dataset with chunked layout, and ensure that the maximum extent(s) is unlimited or meets your needs.
Since I’m lazy and curious, I asked ChatGPT to write me an example. My instruction was this: Using the C++ API, create an extendible chunked integer HDF5 dataset with a 2x2 chunk size and write a 4x4 array to it!
Here is what it produced:
#include <iostream>
#include <H5Cpp.h>
using namespace H5;
void write_4x4_array_to_hdf5() {
// Define the file and dataset properties
const H5std_string FILE_NAME("extendible_chunked_integer_dataset.h5");
const H5std_string DATASET_NAME("IntegerDataset");
const hsize_t CHUNK_SIZE = 2;
// Create the file
H5File file(FILE_NAME, H5F_ACC_TRUNC);
// Define the dataset dimensions
hsize_t initial_dims[2] = {4, 4};
hsize_t max_dims[2] = {H5S_UNLIMITED, H5S_UNLIMITED};
hsize_t chunk_dims[2] = {CHUNK_SIZE, CHUNK_SIZE};
// Create the dataspace
DataSpace dataspace(2, initial_dims, max_dims);
// Create the dataset creation property list and set the chunk size
DSetCreatPropList dcpl;
dcpl.setChunk(2, chunk_dims);
// Create the dataset
DataSet dataset = file.createDataSet(DATASET_NAME, PredType::NATIVE_INT, dataspace, dcpl);
// Define and fill the 4x4 array
int data[4][4] = {
{0, 1, 2, 3},
{4, 5, 6, 7},
{8, 9, 10, 11},
{12, 13, 14, 15}
};
// Write the data to the dataset
dataset.write(data, PredType::NATIVE_INT);
// Close the dataset, dataspace, and file
dataset.close();
dataspace.close();
file.close();
}
int main() {
try {
write_4x4_array_to_hdf5();
} catch (FileIException error) {
error.printError();
return -1;
} catch (DataSetIException error) {
error.printError();
return -1;
} catch (DataSpaceIException error) {
error.printError();
return -1;
}
std::cout << "Successfully created extendible chunked integer HDF5 dataset and wrote a 4x4 array to it!" << std::endl;
return 0;
}
I did not try to compile and run this, but there’s a good chance it will. Can you give this a try and then modify it for your needs?
Best, G.
Hi @rudreshjoshi,
In case you are not restricted to a particular API, you may want to try HDFql, a high-level (declarative) language which greatly alleviates you from HDF5 low-level details. To have an extendible dataset in C++ using HDFql could be done as follows:
// declare a variable named 'matrix'
char *matrix[2][2];
// fill-up 'matrix' with some values
matrix[0][0] = "Astronomy";
matrix[0][1] = "Biology";
matrix[1][0] = "Physics";
matrix[1][1] = "Chemistry";
// create an HDF5 file named 'test.h5' and use (i.e. open) it
HDFql::execute("CREATE AND USE FILE test.h5");
// register 'matrix' for subsequent use (by HDFql)
HDFql::variableTransientRegister(&matrix);
// create a dataset named 'dset' of two dimensions (the first being extendible) and write the values stored in 'matrix' in it
HDFql::execute("CREATE DATASET dset AS VARCHAR(2 TO UNLIMITED, 2) VALUES FROM MEMORY 0");
// alter (i.e. extend) first dimension of 'dset' with +3 (i.e. this dimension will have a size equal to 5)
HDFql::execute("ALTER DIMENSION dset TO +3");
Hope it helps!
Here’s ChatGPT’s take on that:
Using the HDFql C++ API, create an extendible chunked integer dataset with a 2x2 chunk size and write a 4x4 array to it!
#include <iostream>
#include <vector>
#include "HDFql.hpp"
int main() {
// Step 1: Include the necessary header files
// Already done at the beginning of the code
// Step 2: Create an HDF5 file
HDFql::execute("CREATE FILE my_file.h5");
// Step 3: Create an extendible dataset using the create function with chunking and data type specified
HDFql::execute("CREATE CHUNKED DATASET my_file.h5:/my_dataset AS INT(0:UNLIMITED, 0:UNLIMITED) CHUNK(2, 2)");
// Step 4: Write a 4x4 array to the dataset
std::vector<int> data = {1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16};
// Resize the dataset to 4x4
HDFql::execute("ALTER DATASET my_file.h5:/my_dataset SET DIMENSION SIZES 4, 4");
// Write the data to the dataset
HDFql::variableRegister(data);
HDFql::execute("INSERT INTO my_file.h5:/my_dataset VALUES FROM MEMORY " + std::to_string(HDFql::variableGetNumber()));
// Step 5: Close the HDF5 file
HDFql::execute("CLOSE FILE my_file.h5");
return 0;
}
Correct?
G.
stream of scalars:
#include <h5cpp/all>
int main() {
h5::fd_t fd = h5::create("example.h5", H5F_ACC_TRUNC);
h5::pt_t pt = h5::create<size_t>(fd, "stream of scalars", h5::max_dims{H5S_UNLIMITED});
for(auto value: {1,2,3,4,5})
h5::append(pt, value);
}
stream of matrices/frames:
#include <armadillo>
#include <h5cpp/all>
int main() {
h5::fd_t fd = h5::create("example.h5", H5F_ACC_TRUNC);
size_t nrows = 2, ncols=5, nframes=3;
h5::pt_t pt = h5::create<double>(fd, "stream of matrices",
h5::max_dims{H5S_UNLIMITED,nrows,ncols}, h5::chunk{1,nrows,ncols} );
arma::mat M(nrows,ncols);
// do actual IO
for( int i = 0; i < nframes; i++)
h5::append(pt, M);
}
steven
Thanks a lot.
I am alright , how about you ?
Yes your code was helpful , , and since I wanted string variables to be added , I took some help from your code, comments and some help from chatgpt. It almost did it except that in write , it was not writing or crashing.
I had to add some piece of code and that worked. Thanks a lot for your help.
Here is the complete code , if someone wants to try :
{
const H5std_string FILE_NAME(“chunked_dataset.h5”);
const H5std_string DATASET_NAME(“chunked_string_dataset”);
const int ROWS = 10;
const int COLS = 5;
const int INITIAL_ROWS = 2;
int main ()
{
try {
// Create a new HDF5 file
H5::H5File file(FILE_NAME, H5F_ACC_TRUNC);
// Create the dataspace
hsize_t dims[2] = {ROWS, COLS};
hsize_t max_dims[2] = {H5S_UNLIMITED, COLS};
H5::DataSpace dataspace(2, dims, max_dims);
// Set the dataset creation properties
H5::DSetCreatPropList cparms;
cparms.setChunk(2, dims);
H5::StrType datatype(H5::PredType::C_S1, H5T_VARIABLE);
// Create the dataset
// H5::DataSet dataset = file.createDataSet(DATASET_NAME, H5::StrType(H5::PredType::C_S1), dataspace, cparms);
H5::DataSet dataset = file.createDataSet(DATASET_NAME, datatype, dataspace, cparms);
// Create the memory dataspace
hsize_t mem_dims[2] = {INITIAL_ROWS, COLS};
H5::DataSpace memspace(2, mem_dims);
// Create a 2D array of strings
const char* data[INITIAL_ROWS][COLS] = {
{"Hello", "world", "!", "", ""},
{"This", "is", "a", "sample", "array"}
};
// Select a hyperslab in the dataset
hsize_t start[2] = {0, 0};
hsize_t count[2] = {1, COLS};
dataspace.selectHyperslab(H5S_SELECT_SET, count, start);
// Write the data to the hyperslab in the dataset
//dataset.write(data, H5::StrType(H5::PredType::C_S1), memspace, dataspace);
//dataset.write("Its a string" , H5::StrType(H5::PredType::C_S1), memspace, dataspace);
char *matrix_ext[1][5];
matrix_ext[0][0] = "This is str1 ";
matrix_ext[0][1] = "This is str2";
matrix_ext[0][2] = "This is str3";
matrix_ext[0][3] = "This is str4 ";
matrix_ext[0][4] = "This is str5 ";
hsize_t dims3[2] = { 1, 5}; // dataset dimensions at creation
// hsize_t maxdims[2] = {H5S_UNLIMITED, H5S_UNLIMITED};
DataSpace mspace2( 2, dims3, max_dims);
dataset.write( matrix_ext, datatype , mspace2 , dataspace );
// Select a hyperslab in the dataset
hsize_t start2[2] = {1, 0};
hsize_t count2[2] = {1, COLS};
dataspace.selectHyperslab(H5S_SELECT_SET, count2, start2);
dataset.write( matrix_ext, datatype , mspace2 , dataspace );
// Extend the dataset to its final size (10x5)
hsize_t final_dims[2] = {ROWS, COLS};
dataset.extend(final_dims);
// Close the dataset and file
dataset.close();
file.close();
std::cout << "Dataset created successfully!" << std::endl;
} catch (H5::Exception& error) {
error.printErrorStack();
return -1;
}
return 0;
Thanks a lot. We might not be using hdfql but will I will keep in mind if there is a need , I will use it.
Thanks a lot Steven. I wanted to have a dataset of strings ( rows x columns ) , but will keep this in mind
in case of some requirements.
Just for completeness, here’s ChatGPT’s response to Using the H5CPP API, create an extendible chunked integer dataset with a 2x2 chunk size and write a 4x4 array to it!
#include <iostream>
#include <h5cpp/all>
#include <Eigen/Dense>
int main() {
// Create a 4x4 array
Eigen::Matrix<int, 4, 4> data;
data << 1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16;
// Create a file and a dataset
h5::fd_t file = h5::create("data.h5", H5F_ACC_TRUNC);
h5::ds_t<int> dataset = h5::create<int>(file, "dataset",
h5::max_dims{H5S_UNLIMITED, H5S_UNLIMITED},
h5::chunk{2, 2});
// Write the data to the dataset
dataset = data;
// Close the file and dataset
dataset.close();
file.close();
std::cout << "Data written to data.h5 successfully!" << std::endl;
return 0;
}
The close
bits are superfluous because of RAII. The assignment dataset = data;
looks cool(*) but is not (?) supported in the current implementation. @steven.varga is that correct?
G.
(*) You made it look too much like h5py
Hi @gheber,
Thanks for sharing this!
ChatGPT got most of it right - at least, it for sure pointed to the right direction on how to create an extendible chunked integer dataset with a 2x2 chunk size and write a 4x4 array to it using HDFql in C++. For correctness, here is (one of) the solution for this use-case:
#include <iostream>
#include <vector>
#include "HDFql.hpp"
int main() {
// Step 1: Include the necessary header files
// Already done at the beginning of the code
// Step 2: Create an HDF5 file
HDFql::execute("CREATE FILE my_file.h5");
// Step 3: Create an extendible dataset
HDFql::execute("CREATE CHUNKED(2, 2) DATASET my_file.h5 my_dataset AS INT(0 TO UNLIMITED, 0 TO UNLIMITED)");
// Step 4: Write a 4x4 array to the dataset
std::vector<int> data = {1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16};
// Resize the dataset to 4x4
HDFql::execute("ALTER DIMENSION my_file.h5 my_dataset TO 4, 4");
// Write the data to the dataset
HDFql::execute("INSERT INTO my_file.h5 my_dataset VALUES FROM MEMORY " + std::to_string(HDFql::variableTransientRegister(data)));
return 0;
}
Hope it helps!