Write extendible datasets in different files in parallel with HDF5


#1

I have a user-defined class representing a scalar field (NXN matrix). This field (instance) changes as time progresses. I want to be able to write the data from many time steps in an extendible data set with HDF5.

Furthermore, I want to be able to run multiple simulations (each one of them has different input and output files.)

My goal is to execute different runs with different threads, and each thread to write the data in an extendible dataset.

I have written the following code:

#include <mpi.h>
#include <H5Cpp.h>
#include 'Mat2D.h'

using namespace H5;

int main(int argc, char* argv[])
{
  int rank;
  MPI_Init(&argc,&argv);
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);

  int cols = 5;
  int rows = 5;
  int nghost = 1;
  Mat2D field(cols,rows,nghost); //dimensions: (cols+2*nghost)X(rows+2*nghost)
  for (int i=0;i<4;i++)
    field.fill( (double)rank );
  
  std::string FILENAME = "field" + std::to_string(rank) + ".h5";
  std::string DATASETNAME = "ExtendibleArray" + std::to_string(rank);

  hsize_t ROWS = rows + 2*nghost; //write the ghost zones as well
  hsize_t COLS = cols + 2*nghost;
  hsize_t DIMS[2] = {ROWS,COLS}; // data set dimensions at creation
  hsize_t MAXDIMS[2] = {H5S_UNLIMITED, H5S_UNLIMITED};
  hsize_t CHUNK_DIMS[2] = {ROWS,COLS};
  
  /* Variables for extending and writing to the extended 
     portion of the dataset */
  hsize_t SIZE[2];
  SIZE[0] = ROWS;
  SIZE[1] = COLS;
  hsize_t OFFSET[2];
  OFFSET[0] = 0;
  OFFSET[1] = 0;
  hsize_t DIMSEXT[2] = {ROWS,COLS}; //extend dimensions

  // Create a new file using the default property lists.
  H5File FILE(FILENAME, H5F_ACC_TRUNC);

  // Create the data space for the dataset. Note the use of pointer
  // for the instance 'dataspace'. It can be deleted and used again
  // later for another dataspace. An HDF5 identifier can be closed
  // by the destructor or the method 'close()'.
  DataSpace* DATASPACE = new DataSpace(2, DIMS, MAXDIMS);
  
  // Modify dataset creation property to enable chunking
  DSetCreatPropList PROP;
  PROP.setChunk(2, CHUNK_DIMS);

  // Create the chunked dataset. Note the use of the pointer.
  DataSet* DATASET =
    new DataSet(FILE.createDataSet(DATASETNAME,
                   PredType::NATIVE_DOUBLE,*DATASPACE,PROP));

  // Start iterations.
  DataSpace *FILESPACE;
  DataSpace *MEMSPACE;
  int maxiter = 5;
  for (int iter = 0; iter<maxiter; iter++)
    {
      // Extend the dataset.
      if (iter < maxiter-1) // this is to avoid writing a chunk full of zeroes at the end
    {
      SIZE[0] += DIMSEXT[0];
      SIZE[1] = DIMS[1];
      DATASET->extend(SIZE);
    }
      
      // Select a hyperslab in extended portion of the dataset.
      FILESPACE = new DataSpace(DATASET->getSpace());
      OFFSET[0] = iter*ROWS;
      FILESPACE->selectHyperslab(H5S_SELECT_SET, DIMSEXT, OFFSET);
      
      // Define memory space.
      MEMSPACE = new DataSpace(2, DIMSEXT, NULL);
      
      // Write data to the extended portion of the dataset.
      DATASET->write(field.getmemory(),
             PredType::NATIVE_DOUBLE, *MEMSPACE, *FILESPACE);

      field+(double)(iter+1); //modify the field
      delete FILESPACE;
      delete MEMSPACE;
    }
  
  // Close all objects and file.
  PROP.close();
  delete DATASPACE;
  delete DATASET;
  FILE.close();
  
  MPI_Finalize();
  return 0;
}

Now if I execute mpirun -n 2 ./myexe, it works and if I open the first .h5 file with MATLAB I get:

0   0   0   0   0   0   0
0   0   0   0   0   0   0
0   0   0   0   0   0   0
0   0   0   0   0   0   0
0   0   0   0   0   0   0
0   0   0   0   0   0   0
0   0   0   0   0   0   0
1   1   1   1   1   1   1
1   1   1   1   1   1   1
1   1   1   1   1   1   1
1   1   1   1   1   1   1
1   1   1   1   1   1   1
1   1   1   1   1   1   1
1   1   1   1   1   1   1
3   3   3   3   3   3   3
3   3   3   3   3   3   3
3   3   3   3   3   3   3
3   3   3   3   3   3   3
3   3   3   3   3   3   3
3   3   3   3   3   3   3
3   3   3   3   3   3   3
6   6   6   6   6   6   6
6   6   6   6   6   6   6
6   6   6   6   6   6   6
6   6   6   6   6   6   6
6   6   6   6   6   6   6
6   6   6   6   6   6   6
6   6   6   6   6   6   6
10  10  10  10  10  10  10
10  10  10  10  10  10  10
10  10  10  10  10  10  10
10  10  10  10  10  10  10
10  10  10  10  10  10  10
10  10  10  10  10  10  10
10  10  10  10  10  10  10

Now I want to have the block of lines that write to the extendible dataset inside a function so that my main function looks like:

int main(int argc, char* argv[])
{
  int rank;
  MPI_Init(&argc,&argv);
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);

  int cols = 5;
  int rows = 5;
  int nghost = 1;
  Mat2D field(cols,rows,nghost);//dimensions: (cols+2*nghost)X(rows+2*nghost)
  for (int i=0;i<4;i++)
    field.fill((double)rank);
  
  std::string FILENAME = "hfield" + std::to_string(rank) + ".h5";
  std::string DATASETNAME = "ExtendibleArray" + std::to_string(rank);

  int calls = 0;
  int maxiter = 5;
  for (int iter = 0; iter<maxiter; iter++)
    {
      Pwrite(field,FILENAME,DATASETNAME,calls);
      calls++;
      field+(double)(iter+1);
    }
  
  MPI_Finalize();
  return 0;
}

The Pwrite function is:

void Pwrite(Mat2D& field,std::string FILENAME,std::string DATASETNAME,
        int& calls)
{
  
  hsize_t ROWS = field.getrows() + 2*field.getnghost(); //w the ghost zones too 
  hsize_t COLS = field.getcols() + 2*field.getnghost();
  hsize_t DIMS[2] = {ROWS,COLS}; // data set dimensions at creation
  hsize_t MAXDIMS[2] = {H5S_UNLIMITED, H5S_UNLIMITED};
  hsize_t CHUNK_DIMS[2] = {ROWS,COLS};
  
  /* Variables for extending and writing to the extended 
     portion of the dataset */
  hsize_t SIZE[2];
  SIZE[0] = ROWS;
  SIZE[1] = COLS;
  hsize_t OFFSET[2];
  OFFSET[0] = 0;
  OFFSET[1] = 0;
  hsize_t DIMSEXT[2] = {ROWS,COLS}; //extend dimensions

  // Create a new file using the default property lists.
  H5File FILE(FILENAME, H5F_ACC_TRUNC);

  // Create the data space for the dataset. Note the use of pointer
  // for the instance 'dataspace'. It can be deleted and used again
  // later for another dataspace. An HDF5 identifier can be closed
  // by the destructor or the method 'close()'.
  DataSpace* DATASPACE = new DataSpace(2, DIMS, MAXDIMS);
  
  // Modify dataset creation property to enable chunking
  DSetCreatPropList PROP;
  PROP.setChunk(2, CHUNK_DIMS);

  // Create the chunked dataset. Note the use of pointer.
  DataSet* DATASET =
    new DataSet(FILE.createDataSet(DATASETNAME,
                   PredType::NATIVE_DOUBLE,*DATASPACE,PROP));

  DataSpace *FILESPACE;
  DataSpace *MEMSPACE;

  SIZE[0] += DIMSEXT[0];
  SIZE[1] = DIMS[1];
  DATASET->extend(SIZE);

  // Select a hyperslab in extended portion of the dataset.
  FILESPACE = new DataSpace(DATASET->getSpace());
  OFFSET[0] = calls*ROWS;
  FILESPACE->selectHyperslab(H5S_SELECT_SET, DIMSEXT, OFFSET);
  
  // Define memory space.
  MEMSPACE = new DataSpace(2, DIMSEXT, NULL);
  
  // Write data to the extended portion of the dataset.
  DATASET->write(field.getmemory(),
         PredType::NATIVE_DOUBLE, *MEMSPACE, *FILESPACE);
  
  delete FILESPACE;
  delete MEMSPACE;
    
  // Close all objects and file.
  PROP.close();
  delete DATASPACE;
  delete DATASET;
  FILE.close();
}

When I execute mpirun -n 2 ./myexe I am getting:

HDF5-DIAG: Error detected in HDF5 (1.12.0) thread 0:
  #000: H5Dio.c line 314 in H5Dwrite(): can't write data
    major: Dataset
    minor: Write failed
  #001: H5VLcallback.c line 2186 in H5VL_dataset_write(): dataset write failed
    major: Virtual Object Layer
    minor: Write failed
  #002: H5VLcallback.c line 2152 in H5VL__dataset_write(): dataset write failed
    major: Virtual Object Layer
    minor: Write failed
  #003: H5VLnative_dataset.c line 203 in H5VL__native_dataset_write(): could not get a validated dataspace from file_space_id
    major: Invalid arguments to routine
    minorHDF5-DIAG: Error detected in HDF5 (1.12.0) thread 0:
  #000: H5Dio.c line 314 in H5Dwrite(): can't write data
    major: Dataset
    minor: Write failed
  #001: H5VLcallback.c line 2186 in H5VL_dataset_write(): dataset write failed
    major: Virtual Object Layer
    minor: Write failed
  #002: H5VLcallback.c line 2152 in H5VL__dataset_write(): dataset write failed
    major: Virtual Object Layer
    minor: Write failed
  #003: H5VLnative_dataset.c line 203 in H5VL__native_dataset_write(): could not get a validated dataspace from file_space_id
    major: Invalid arguments to routine
    minor: Bad value
  #004: H5S.c line 279 in H5S_get_validated_dataspace(): : Bad value
  #004: H5S.c line 279 in H5S_get_validated_dataspace(): selection + offset not within extent
    major: selection + offset not within extent
    major: Dataspace
    minoDataspace
    minor: Out of range
r: Out of range
terminate called after throwing an instance of 'H5::DataSetIException'
terminate called after throwing an instance of 'H5::DataSetIException'

===================================================================================
=   BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
=   PID 13121 RUNNING AT angelos-GF63-Thin-10SCXR
=   EXIT CODE: 134
=   CLEANING UP REMAINING PROCESSES
=   YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Aborted (signal 6)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

Any suggestions on how to fix it? Thank you.