Unable to write the same vector several times?

hello all,
i am tring this code on ubuntu 22 LTS:

#include "H5Cpp.h"  // Include the HDF5 C++ API header
#include <iostream>
#include <vector>

using namespace H5;

int main() {
    // File name and dataset name
    const std::string FILE_NAME = "example.h5";
    const std::string DATASET_NAME = "3D_Double_Array";

    // Dimensions of the 3D array
    const hsize_t DIM_X = 4; // First dimension
    const hsize_t DIM_Y = 3; // Second dimension
    const hsize_t DIM_Z = 2; // Third dimension
    const hsize_t dims[3] = {DIM_X*2, DIM_Y*2, DIM_Z*2};

    // Create a 3D array and fill it with values
    std::vector<double> data(DIM_X * DIM_Y * DIM_Z);
    for (hsize_t i = 0; i < DIM_X; ++i) {
        for (hsize_t j = 0; j < DIM_Y; ++j) {
            for (hsize_t k = 0; k < DIM_Z; ++k) {
                data[i * DIM_Y * DIM_Z + j * DIM_Z + k] = static_cast<double>(i * 100 + j * 10 + k);
            }
        }
    }

    try {
        // Create a new HDF5 file
        H5File file(FILE_NAME, H5F_ACC_TRUNC);

        // Create the dataspace for the dataset
        DataSpace dataspace(3, dims);

        // Create the dataset with datatype double
        DataSet dataset = file.createDataSet(DATASET_NAME, PredType::NATIVE_DOUBLE, dataspace);

        // Write the data to the dataset
        for (int i = 0; i< 8; i++) {
            std::cout << "i " << i << ": " << data.data()[2] << " " << data.data()[3] << std::endl;
            dataset.write(data.data(), PredType::NATIVE_DOUBLE);
        }

        std::cout << "3D array successfully written to " << FILE_NAME << std::endl;

    } catch (FileIException &e) {
        e.printErrorStack();
        return -1;
    } catch (DataSetIException &e) {
        e.printErrorStack();
        return -1;
    } catch (DataSpaceIException &e) {
        e.printErrorStack();
        return -1;
    }

    return 0;
}


but the resulting h5 file does not contain correct values after the first call to data.data():

$ h5dump example.h5
HDF5 "example.h5" {
GROUP "/" {
   DATASET "3D_Double_Array" {
      DATATYPE  H5T_IEEE_F64LE
      DATASPACE  SIMPLE { ( 8, 6, 4 ) / ( 8, 6, 4 ) }
      DATA {
      (0,0,0): 0, 1, 10, 11,
      (0,1,0): 20, 21, 100, 101,
      (0,2,0): 110, 111, 120, 121,
      (0,3,0): 200, 201, 210, 211,
      (0,4,0): 220, 221, 300, 301,
      (0,5,0): 310, 311, 320, 321,
      (1,0,0): 0, 2.42092e-322, 4.91634e-310, 4.94066e-324,
      (1,1,0): 4.94066e-324, 4.94066e-324, 4.91634e-310, 1.63042e-322,
      (1,2,0): 4.91634e-310, 4.91634e-310, 0, 5.85468e-321,
      (1,3,0): 3.78577e-270, 4.91634e-310, 9.88131e-324, 9.38725e-323,
      (1,4,0): 1.82918e-319, nan, 0, 4.94066e-324,
      (1,5,0): 4.94066e-324, 0, 1.4822e-323, 2.02369e-320,
      (2,0,0): 4.74303e-322, 4.74303e-322, 3.42966e+161, 1.1908e-307,
      (2,1,0): 0, 0, 0, 0,
      (2,2,0): 0, 0, 0, 0,
      (2,3,0): 0, 0, 0, 0,
      (2,4,0): 0, 0, 0, 0,
      (2,5,0): 0, 0, 0, 0,
      (3,0,0): 0, 0, 0, 0,
      (3,1,0): 0, 0, 0, 0,
      (3,2,0): 0, 0, 0, 0,
      (3,3,0): 0, 0, 0, 0,
      (3,4,0): 0, 0, 0, 0,
      (3,5,0): 0, 0, 0, 0,
      (4,0,0): 0, 0, 0, 0,
      (4,1,0): 0, 0, 0, 0,
      (4,2,0): 0, 0, 0, 0,
      (4,3,0): 0, 0, 0, 0,
      (4,4,0): 0, 0, 0, 0,
      (4,5,0): 0, 0, 0, 0,
      (5,0,0): 0, 0, 0, 0,
      (5,1,0): 0, 0, 0, 0,
      (5,2,0): 0, 0, 0, 0,
      (5,3,0): 0, 0, 0, 0,
      (5,4,0): 0, 0, 0, 0,
      (5,5,0): 0, 0, 0, 0,
      (6,0,0): 0, 0, 0, 0,
      (6,1,0): 0, 0, 0, 0,
      (6,2,0): 0, 0, 0, 0,
      (6,3,0): 0, 0, 0, 0,
      (6,4,0): 0, 0, 0, 0,
      (6,5,0): 0, 0, 0, 0,
      (7,0,0): 0, 0, 0, 0,
      (7,1,0): 0, 0, 0, 0,
      (7,2,0): 0, 0, 0, 1.13833e-320,
      (7,3,0): 4.92301e-316, 0, 0, 1.63042e-322,
      (7,4,0): 4.91634e-310, 4.91634e-310, 0, 1.63042e-322,
      (7,5,0): 4.91634e-310, 4.91634e-310, 0, 1.13882e-320
      }
   }
}
}

however, the content of the data vector seems correct, since I display it before each write to the file.

Wher am i wrong?

Thanks in advance,

Gérard

Hi, @gerard.henry!

I asked Gemini and got the following answer. I hope it can help.


The code has a few potential issues:

  1. Data size mismatch: The loop inside the write function iterates 8 times, but the data array (data) only holds elements for DIM_X * DIM_Y * DIM_Z (48 elements). This will attempt to write outside the allocated memory and might cause a crash or data corruption.
  2. Incorrect access in write loop: You’re printing data.data()[2] and data.data()[3] inside the loop, but these accesses are not relevant to writing the entire data array.
  3. Unnecessary loop: The write function can write the entire data array in one call.

Here’s how to fix the code:

#include "H5Cpp.h"  // Include the HDF5 C++ API header
#include <iostream>
#include <vector>

using namespace H5;

int main() {
    // File name and dataset name
    const std::string FILE_NAME = "example.h5";
    const std::string DATASET_NAME = "3D_Double_Array";

    // Dimensions of the 3D array
    const hsize_t DIM_X = 4; 
    const hsize_t DIM_Y = 3; 
    const hsize_t DIM_Z = 2; 
    const hsize_t dims[3] = {DIM_X, DIM_Y, DIM_Z};

    // Create a 3D array and fill it with values
    std::vector<double> data(DIM_X * DIM_Y * DIM_Z);
    for (hsize_t i = 0; i < DIM_X; ++i) {
        for (hsize_t j = 0; j < DIM_Y; ++j) {
            for (hsize_t k = 0; k < DIM_Z; ++k) {
                data[i * DIM_Y * DIM_Z + j * DIM_Z + k] = static_cast<double>(i * 100 + j * 10 + k);
            }
        }
    }

    try {
        // Create a new HDF5 file
        H5File file(FILE_NAME, H5F_ACC_TRUNC);

        // Create the dataspace for the dataset
        DataSpace dataspace(3, dims);

        // Create the dataset with datatype double
        DataSet dataset = file.createDataSet(DATASET_NAME, PredType::NATIVE_DOUBLE, dataspace);

        // Write the data to the dataset in one call
        dataset.write(data.data(), PredType::NATIVE_DOUBLE);

        std::cout << "3D array successfully written to " << FILE_NAME << std::endl;

    } catch (FileIException &e) {
        e.printErrorStack();
        return -1;
    } catch (DataSetIException &e) {
        e.printErrorStack();
        return -1;
    } catch (DataSpaceIException &e) {
        e.printErrorStack();
        return -1;
    }

    return 0;
}

This code creates the dataset with the correct dimensions, writes the entire data vector to the HDF5 file in one call using dataset.write, and removes the unnecessary loop.

1 Like

hello,
thanks for your fast reply, but i know your solution (i also used a LLM for that :wink:), this is not what i want to do.
my problem is that I have a loop that generates over 100GB of data, and I don’t want to store it in a vector, I’d like to write at regular intervals in the loop. That’s why I’ve just built a little example (with the help of chatgpt) that writes the vector inside the loop, and not after, as in your example

Hi, @gerard.henry !

I think HDF5: Extendible Datasets can help.

Alternatively, you can also write a separate HDF5 file at regular interval.
Then, you can later merge them into a single file if necessary using a tool like ncks:
NCO 5.2.9 User Guide.

NASA Earth Observing Data management system is a great example of handling petabytes of data that are being generated by satellites looping continuously over 25 years:
Earthdata Search | Earthdata Search

In the above link, you can see a million Granules (= files) per data product.

Yes, I know, but I was hoping to do without it because I have to insert my writing in a rather special processing loop;
There’s this nice example I’ve picked up:

#include <H5Cpp.h>
#include <vector>
#include <iostream>

using namespace H5;

const H5std_string FILE_NAME("example.h5");
const H5std_string DATASET_NAME("3D_Dataset");

int main() {
    try {
        // Define dimensions of the entire dataset
        const hsize_t x_dim = 100; // e.g., X dimension
        const hsize_t y_dim = 100; // e.g., Y dimension
        const hsize_t z_dim = 100; // e.g., Z dimension

        // Define chunk size
        const hsize_t chunk_x = 10;
        const hsize_t chunk_y = 10;
        const hsize_t chunk_z = 10;

        // Create a new file using the default properties.
        H5File file(FILE_NAME, H5F_ACC_TRUNC);

        // Define the total dataset size and chunks
        hsize_t dims[3] = {x_dim, y_dim, z_dim};
        hsize_t chunk_dims[3] = {chunk_x, chunk_y, chunk_z};

        // Create dataspace for the dataset
        DataSpace dataspace(3, dims);

        // Create property list for chunking and compression
        DSetCreatPropList plist;
        plist.setChunk(3, chunk_dims);
        plist.setDeflate(6); // Compression level (0-9)

        // Create the dataset
        DataSet dataset = file.createDataSet(DATASET_NAME, PredType::IEEE_F64LE, dataspace, plist);

        // Buffer to hold chunk data
        std::vector<double> chunk_data(chunk_x * chunk_y * chunk_z);

        // Write data chunk by chunk
        for (hsize_t x = 0; x < x_dim; x += chunk_x) {
            for (hsize_t y = 0; y < y_dim; y += chunk_y) {
                for (hsize_t z = 0; z < z_dim; z += chunk_z) {
                    int ii = 0;
                    // Fill the chunk buffer with some data
                    for (size_t i = 0; i < chunk_data.size(); ++i) {
                        chunk_data[i] = static_cast<double>(ii++); // Example data
                    }

                    // Define the hyperslab in the dataset
                    hsize_t offset[3] = {x, y, z};
                    hsize_t count[3] = {chunk_x, chunk_y, chunk_z};
                    dataspace.selectHyperslab(H5S_SELECT_SET, count, offset);

                    // Define memory space for the chunk
                    DataSpace memspace(3, count);

                    // Write the chunk to the dataset
                    dataset.write(chunk_data.data(), PredType::NATIVE_DOUBLE, memspace, dataspace);
                }
            }
        }
    } catch (FileIException &e) {
        e.printErrorStack();
        return -1;
    } catch (DataSetIException &e) {
        e.printErrorStack();
        return -1;
    } catch (DataSpaceIException &e) {
        e.printErrorStack();
        return -1;
    } catch (DataTypeIException &e) {
        e.printErrorStack();
        return -1;
    }

    return 0;
}

which works well, but in my case, I can’t touch the loops, because it’s already a special loop, I’m forced to let

        for (hsize_t x = 0; x < x_dim; x += 3) {
            for (hsize_t y = 0; y < y_dim; y += 3) {

and so it starts to become a complex process, whereas my first example worked, one line was enough

thanks for your help, i’ll take a look at the links