Hi,
I am attempting to write HDF5 files in parallel but I am having problems. Depending on he compiler and HDF version I am getting different results. In some cases the data will be written out extremely slowly, in others an initial set of dumps will be quick but the code hangs when attempting to write files at a later time. And in even other cases the code runs fine but the data that has been written is corrupted, i.e. some hyperslabs contain incomplete values of values from the wrong process.
I am not sure if I am doing something wrong or if it is a problem to do with the configuration of the software or the hardware. I am not the admin on the machine that I am trying to run the code on so I can't comment to much on the details of the configuration but I did try compiling my own version of the HDF libraries without seeing any improvement.
Below I have pasted the FORTRAN code of my routine for writing an array in parallel. A lot of it was copied from tutorials I found online but I think I understand most of it. However, I am not sure about the specific settings that I am passing to MPI or the values of sieve_buf_size etc. Could these be the cause of the problem?
Any help would be appreciated, including any leads on where to start looking for the problem.
Thanks
Holger
subroutine write_field(name, field)
character(LEN=*), intent(in) :: name
double precision, dimension(x_lo_g:x_hi_g, y_lo_g:x_hi_g, z_lo_g:x_hi_g), intent(in) :: field
! [xyz]_lo_g and [xyz]_hi_g are the dimensions of the local grid, including ghost cells
! [xyz]_lo_g and [xyz]_hi_g are the dimensions of the inner domain of the local grid, excluding ghost cells
! N[xyz] is the size of the global domain, excluding ghost cells
integer(HID_T) :: plist_id
integer(HID_T) :: dxpl_id
integer(HID_T) :: file_id
integer(HID_T) :: space_id
integer(HID_T) :: dset_id
integer(HID_T) :: file_dataspace
integer(HID_T) :: mem_dataspace
integer :: mpi_info
integer :: hdferr, mpierr
integer(HSIZE_T) :: locdims(3)
integer(HSIZE_T) :: locdims_g(3)
integer(HSIZE_T) :: dims(3)
integer(HSIZE_T) :: start(3)
integer(HSIZE_T) :: gstart(3)
integer(8), parameter :: sieve_buf_size = 262144
integer(8), parameter :: align_threshold = 524288
integer(8), parameter :: alignment = 262144
! call H5Pset_fapl_mpiposix_f(plist_id, MPI_COMM_WORLD, .false., hdferr);
! setup file access template
call H5Pcreate_f(H5P_FILE_ACCESS_F, plist_id, hdferr)
call H5Pset_sieve_buf_size_f(plist_id, sieve_buf_size, hdferr)
call H5Pset_alignment_f(plist_id, align_threshold, alignment, hdferr)
call MPI_Info_create(mpi_info, mpierr)
call MPI_Info_set(mpi_info, "access_style", "write_once", mpierr)
call MPI_Info_set(mpi_info, "collective_buffering", "true", mpierr)
call MPI_Info_set(mpi_info, "cb_block_size", "1048576", mpierr)
call MPI_Info_set(mpi_info, "cb_buffer_size", "4194304", mpierr)
! set Parallel access with communicator
call H5Pset_fapl_mpio_f(plist_id, MPI_COMM_WORLD, mpi_info, hdferr);
! H5Pset_fapl_mpiposix(plist_id, mpiComm, 0);
! open the file collectively
call H5Fcreate_f(name, H5F_ACC_TRUNC_F, file_id, hdferr, H5P_DEFAULT_F, plist_id);
! Release file-access template
call H5Pclose_f(plist_id, hdferr);
call H5Pcreate_f(H5P_DATASET_XFER_F, dxpl_id, hdferr)
call H5Pset_dxpl_mpio_f(dxpl_id, H5FD_MPIO_COLLECTIVE_F, hdferr)
locdims(1) = x_hi-x_lo+1
locdims(2) = y_hi-y_lo+1
locdims(3) = z_hi-z_lo+1
locdims_g(1) = x_hi_g-x_lo_g+1
locdims_g(2) = y_hi_g-y_lo_g+1
locdims_g(3) = z_hi_g-z_lo_g+1
! The start is the inner region of the domain, excluding the ghost cells
start(1) = x_lo
start(2) = y_lo
start(3) = z_lo
gstart = ghost
! ************************************************
! Beginning of parallel HDF output
dims(1) = Nx
dims(2) = Ny
dims(3) = Nz
! create a simple dataspace with global dimensions
call H5Screate_simple_f(3, dims, space_id, hdferr)
! create a dataset linked to the file and associate it with the dataspace
call H5Dcreate_f(file_id, "data", H5T_NATIVE_DOUBLE, space_id, dset_id, &
hdferr)
! makes a copy of the dataspace contained in the file
! we need a copy because we are going to modify it by selecting a hyperslab
call H5Dget_space_f(dset_id, file_dataspace, hdferr)
! select a hyperslab in the file dataspace. This is the region in the file
! that we will be writing into
call H5Sselect_hyperslab_f(file_dataspace, H5S_SELECT_SET_F, &
start, locdims, hdferr)
! create a memory dataspace. This dataspace corresponds to the extent of
! the local array
call H5Screate_simple_f(3, locdims_g, mem_dataspace, hdferr)
! select a hyperslab in the memory dataspace. This is the region
! excluding the ghost cells
call H5Sselect_hyperslab_f(mem_dataspace, H5S_SELECT_SET_F, &
gstart, locdims, hdferr)
! hid_t mem_dataspace = H5Dget_space(dataset);
! write data (independently or collectively, depending on dxpl_id)
call H5Dwrite_f(dset_id, &
H5T_NATIVE_DOUBLE, &
field, &
locdims, &
hdferr, &
mem_dataspace, &
file_dataspace, &
dxpl_id)
! release dataspace ID
call H5Sclose_f(mem_dataspace, hdferr)
call H5Sclose_f(file_dataspace, hdferr)
call H5Dclose_f(dset_id, hdferr)
call H5Fclose_f(file_id, hdferr)
end subroutine write_field