Hi,
I am using parallel HDF5 with OpenMPI 4.0 within IOR (I/O benchmark).
We traced HDF5 operations of IOR with our own measurement tool and got after the run an inconsistent HDF5 file. The measurement tool just calls H5Iget_file_id
while executing H5D_write
.
I broke down the issue to a little example (see below). There H5get_libversion
is called before ```MPI_Init`` and causes the following error output as well as an inconsistent HDF5 file.
*** The MPI_Barrier() function was called after MPI_FINALIZE was invoked.
*** This is disallowed by the MPI standard.
*** Your MPI job will now abort.
[bimo:30167] Local abort after MPI_FINALIZE started completed successfully, but am not able to aggregate error messages, and not able to guarantee that all other processes were killed!
--------------------------------------------------------------------------
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun detected that one or more processes exited with non-zero status, thus causing
the job to be terminated. The first process to do so was:
Process name: [[45516,1],0]
Exit code: 1
--------------------------------------------------------------------------
When I move H5get_libversion
after the MPI_Init
or delete the call of H5Iget_file_id
, the error disappear.
Used software versions
1. Test System - Laptop
OS: Archlinux
Compiler: GCC 8.2.1
HDF5: 1.8.21 with MPI support
MPI: OpenMPI 4.0.0
2. Test System - HPC Cluster
OS: Red Hat Enterprise Linux Server release 7.4
Compiler: GCC 6.4.0
HDF5: 1.10.1 with MPI support
MPI: OpenMPI 2.1.2
Code example:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define H5_USE_16_API
#include <hdf5.h>
#include <mpi.h>
#define NUM_DIMS 1 /* number of dimensions to data set */
typedef long long int IOR_size_t;
void seek_offset(hid_t data_set,
hid_t* file_data_space,
hsize_t start,
hsize_t stride,
hsize_t block)
{
hsize_t hsCount[NUM_DIMS];
hsize_t hsStride[NUM_DIMS];
hsize_t hsBlock[NUM_DIMS];
hsize_t hsStart[NUM_DIMS];
hsCount[0] = (hsize_t) 1;
hsStride[0] = stride;
hsBlock[0] = block;
hsStart[0] = start;
*file_data_space = H5Dget_space(data_set);
H5Sselect_hyperslab(*file_data_space, H5S_SELECT_SET,
hsStart, hsStride, hsCount, hsBlock);
}
int main(int argc, char **argv) {
hid_t xferPropList; /* xfer property list */
hid_t dataSet; /* data set id */
hid_t dataSpace; /* data space id */
hid_t fileDataSpace; /* file data space id */
hid_t memDataSpace; /* memory data space id */
IOR_size_t blockSize = 1048576;
IOR_size_t transferSize = 262144;
size_t alignment = 1;
const char *dataSetName = "foo-bar";
hsize_t memStart[NUM_DIMS], dataSetDims[NUM_DIMS], memStride[NUM_DIMS],
memCount[NUM_DIMS], memBlock[NUM_DIMS], memDataSpaceDims[NUM_DIMS];
unsigned m1,m2,m3;
// Moving H5get_libversion after MPI_Init will fix the problem.
H5get_libversion(&m1, &m2, &m3);
MPI_Init(&argc, &argv);
hid_t createPropList = H5Pcreate(H5P_FILE_CREATE);
H5Pset_sizes(createPropList, sizeof(hsize_t), sizeof(hsize_t));
hid_t accessPropList = H5Pcreate(H5P_FILE_ACCESS);
H5Pset_fapl_mpio(accessPropList, MPI_COMM_WORLD, MPI_INFO_NULL),
H5Pset_alignment(accessPropList, alignment, alignment);
hid_t file = H5Fcreate("foo.h5", H5F_ACC_TRUNC, createPropList, accessPropList);
H5Pclose(createPropList);
H5Pclose(accessPropList);
xferPropList = H5Pcreate(H5P_DATASET_XFER);
H5Pset_dxpl_mpio(xferPropList, H5FD_MPIO_INDEPENDENT);
memStart[0] = (hsize_t)0;
memCount[0] = (hsize_t)1;
memStride[0] = (hsize_t)(transferSize / sizeof(IOR_size_t));
memBlock[0] = (hsize_t)(transferSize / sizeof(IOR_size_t));
memDataSpaceDims[0] = (hsize_t)transferSize;
memDataSpace = H5Screate_simple(NUM_DIMS, memDataSpaceDims, NULL);
H5Sselect_hyperslab(memDataSpace, H5S_SELECT_SET, memStart, memStride,
memCount, memBlock);
dataSetDims[0] = (hsize_t)(blockSize / sizeof(size_t));
dataSpace = H5Screate_simple(NUM_DIMS, dataSetDims, NULL);
hid_t dataSetPropList = H5Pcreate(H5P_DATASET_CREATE);
dataSet = H5Dcreate(file, dataSetName, H5T_NATIVE_LLONG, dataSpace,
dataSetPropList);
void *buffer = malloc(sizeof(unsigned char) * 262144);
memset(buffer, 1, 262144);
seek_offset(dataSet, &fileDataSpace, 0, 32768, 32768);
// H5Iget_file_id in combination with H5get_libversion cause an error.
H5Iget_file_id(dataSet);
H5Dwrite(dataSet, H5T_NATIVE_LLONG, memDataSpace, fileDataSpace, xferPropList, buffer);
seek_offset(dataSet, &fileDataSpace, 32768, 32768, 32768);
H5Dwrite(dataSet, H5T_NATIVE_LLONG, memDataSpace, fileDataSpace, xferPropList, buffer);
seek_offset(dataSet, &fileDataSpace, 65536, 32768, 32768);
H5Dwrite(dataSet, H5T_NATIVE_LLONG, memDataSpace, fileDataSpace, xferPropList, buffer);
seek_offset(dataSet, &fileDataSpace, 98304, 32768, 32768);
H5Dwrite(dataSet, H5T_NATIVE_LLONG, memDataSpace, fileDataSpace, xferPropList, buffer);
H5Dclose(dataSet);
H5Pclose(dataSetPropList);
H5Sclose(dataSpace);
H5Sclose(fileDataSpace);
H5Sclose(memDataSpace);
H5Pclose(xferPropList);
H5Fclose(file);
MPI_Finalize();
return 0;
}