HDF5Bug? H5FD_MPIO_COLLECTIVE + Chunking

We see a strange Bug with HDF5 and our simulation code PIConGPU HDF5: Field "Kinks" with FreeFormula · Issue #2841 · ComputationalRadiationPhysics/picongpu · GitHub

Chunking + H5FD_MPIO_COLLECTIVE with 16 mpi ranks write wrong data (black artifacts in the image)

compile:

  • broken: mpicc -g main.cpp -lhdf5 -L$HDF5_ROOT/lib && mpiexec -n 16 ./a.out
  • fix: mpicc -g main.cpp -lhdf5 -L$HDF5_ROOT/lib -DFIX && mpiexec -n 16 ./a.out
#include <mpi.h>
#include <hdf5.h>

#include <stdlib.h>
#include <string.h>
#include <stdio.h>

#define X 1872llu
#define Y 1872llu

int write_HDF5(
    MPI_Comm const comm, MPI_Info const info,
    float* data, size_t len, int rank)
{
    // property list
    hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);

    // MPI-I/O driver
    H5Pset_fapl_mpio(plist_id, comm, info); 

    // file create
    char file_name[100];
    sprintf(file_name, "%zu", len);
    strcat(file_name, ".h5");
    hid_t file_id = H5Fcreate(file_name, H5F_ACC_TRUNC,  
                              H5P_DEFAULT, plist_id); 

    // dataspace
    hsize_t dims[3] = {Y, X};
    hsize_t globalDims[3] = {Y * 4, X * 4};
    hsize_t max_dims[2] = {Y * 4, X * 4};
    hsize_t offset[2] = {rank/4 * Y, rank%4 * X};

    hid_t srcSize = H5Screate_simple(2, dims, NULL);
    hid_t filespace = H5Screate_simple(2,
        globalDims,
        max_dims);

    printf("%i: %llu,%llu %llu,%llu \n", rank,offset[0],offset[1],globalDims[0],globalDims[1]);
    
    // chunking
    hsize_t chunk[2] = {128, 128};
    hid_t datasetCreationProperty = H5Pcreate(H5P_DATASET_CREATE);
    H5Pset_chunk(datasetCreationProperty, 2, chunk);

    // dataset
    hid_t dset_id = H5Dcreate(file_id, "dataset1", H5T_NATIVE_FLOAT,  
                              filespace, H5P_DEFAULT,
                              datasetCreationProperty, H5P_DEFAULT);
                        
    // write
    hid_t dset_plist_id = H5Pcreate(H5P_DATASET_XFER);

#ifdef FIX
    H5Pset_dxpl_mpio(dset_plist_id, H5FD_MPIO_INDEPENDENT); // default
#else
    H5Pset_dxpl_mpio(dset_plist_id, H5FD_MPIO_COLLECTIVE); 
#endif

    hid_t dd = H5Dget_space(dset_id);
    H5Sselect_hyperslab(dd, H5S_SELECT_SET, offset,
                        NULL, dims, NULL);

    herr_t status;
    status = H5Dwrite(dset_id, H5T_NATIVE_FLOAT, 
                      srcSize, dd, dset_plist_id, data); 

    // close all
    status = H5Pclose(plist_id);
    status = H5Pclose(dset_plist_id);
    status = H5Dclose(dset_id);
    status = H5Fclose(file_id);

    return 0;
}

int main(int argc, char* argv[])
{

    MPI_Comm comm = MPI_COMM_WORLD; 
    MPI_Info info = MPI_INFO_NULL;  

    MPI_Init(&argc, &argv);

    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
    size_t lengths[1] = {X*Y};
    for( size_t i = 0; i < 1; ++i )
    {
        size_t len = lengths[i];
        printf("Writing for len=%zu ...\n", len);
        float* data = (float*)malloc(len * sizeof(float));
        for( size_t y = 0; y < Y; ++y)
            for( size_t x = 0; x < X; ++x)
                data[y * Y + x] = 100.f + y%1024;
    
        write_HDF5(comm, info, data, len, rank);
        free(data);
        printf("Finished write for len=%zu ...\n", len);
    }
    
    MPI_Finalize();

    return 0;
}

Software:

  • gcc (GCC) 5.3.0
  • hdf5-parallel 1.8.2 and 1.10.4
  • openmpi/2.1.2 compiled with CUDA8
  • CUDA 8
  • Ubuntu 14.04.1

update: I updated the example code to avoid that the global domain size is not a multiple of 4 which result into an not well initialized inout array for each MPI rank.