We see a strange Bug with HDF5 and our simulation code PIConGPU HDF5: Field "Kinks" with FreeFormula · Issue #2841 · ComputationalRadiationPhysics/picongpu · GitHub
Chunking + H5FD_MPIO_COLLECTIVE with 16 mpi ranks write wrong data (black artifacts in the image)
compile:
- broken:
mpicc -g main.cpp -lhdf5 -L$HDF5_ROOT/lib && mpiexec -n 16 ./a.out
- fix:
mpicc -g main.cpp -lhdf5 -L$HDF5_ROOT/lib -DFIX && mpiexec -n 16 ./a.out
#include <mpi.h>
#include <hdf5.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#define X 1872llu
#define Y 1872llu
int write_HDF5(
MPI_Comm const comm, MPI_Info const info,
float* data, size_t len, int rank)
{
// property list
hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
// MPI-I/O driver
H5Pset_fapl_mpio(plist_id, comm, info);
// file create
char file_name[100];
sprintf(file_name, "%zu", len);
strcat(file_name, ".h5");
hid_t file_id = H5Fcreate(file_name, H5F_ACC_TRUNC,
H5P_DEFAULT, plist_id);
// dataspace
hsize_t dims[3] = {Y, X};
hsize_t globalDims[3] = {Y * 4, X * 4};
hsize_t max_dims[2] = {Y * 4, X * 4};
hsize_t offset[2] = {rank/4 * Y, rank%4 * X};
hid_t srcSize = H5Screate_simple(2, dims, NULL);
hid_t filespace = H5Screate_simple(2,
globalDims,
max_dims);
printf("%i: %llu,%llu %llu,%llu \n", rank,offset[0],offset[1],globalDims[0],globalDims[1]);
// chunking
hsize_t chunk[2] = {128, 128};
hid_t datasetCreationProperty = H5Pcreate(H5P_DATASET_CREATE);
H5Pset_chunk(datasetCreationProperty, 2, chunk);
// dataset
hid_t dset_id = H5Dcreate(file_id, "dataset1", H5T_NATIVE_FLOAT,
filespace, H5P_DEFAULT,
datasetCreationProperty, H5P_DEFAULT);
// write
hid_t dset_plist_id = H5Pcreate(H5P_DATASET_XFER);
#ifdef FIX
H5Pset_dxpl_mpio(dset_plist_id, H5FD_MPIO_INDEPENDENT); // default
#else
H5Pset_dxpl_mpio(dset_plist_id, H5FD_MPIO_COLLECTIVE);
#endif
hid_t dd = H5Dget_space(dset_id);
H5Sselect_hyperslab(dd, H5S_SELECT_SET, offset,
NULL, dims, NULL);
herr_t status;
status = H5Dwrite(dset_id, H5T_NATIVE_FLOAT,
srcSize, dd, dset_plist_id, data);
// close all
status = H5Pclose(plist_id);
status = H5Pclose(dset_plist_id);
status = H5Dclose(dset_id);
status = H5Fclose(file_id);
return 0;
}
int main(int argc, char* argv[])
{
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Info info = MPI_INFO_NULL;
MPI_Init(&argc, &argv);
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
size_t lengths[1] = {X*Y};
for( size_t i = 0; i < 1; ++i )
{
size_t len = lengths[i];
printf("Writing for len=%zu ...\n", len);
float* data = (float*)malloc(len * sizeof(float));
for( size_t y = 0; y < Y; ++y)
for( size_t x = 0; x < X; ++x)
data[y * Y + x] = 100.f + y%1024;
write_HDF5(comm, info, data, len, rank);
free(data);
printf("Finished write for len=%zu ...\n", len);
}
MPI_Finalize();
return 0;
}
Software:
- gcc (GCC) 5.3.0
- hdf5-parallel 1.8.2 and 1.10.4
- openmpi/2.1.2 compiled with CUDA8
- CUDA 8
- Ubuntu 14.04.1
update: I updated the example code to avoid that the global domain size is not a multiple of 4 which result into an not well initialized inout array for each MPI rank.