Hello everyone,
I'm having some trouble getting HDF5 to work on our clusters. I've compiled
hdf5 with the following options:
export CC=mpicc
export CXX=mpicxx
export FC=mpif90
--enable-fortran --enable-parallel --disable-production --enable-debug=all
I wrote a simple fortran test program to be run with two processors below:
program test
use hdf5
implicit none
include 'mpif.h'integer, parameter :: str_medium = 40
! group name
character(len=str_medium) :: hdf_block_group = '/Block'
! dataset names
character(len=str_medium) :: hdf_nnodes_dset = 'nnodes_gl'
! h5 file/group/dataset/dataspace descriptors
integer(HSIZE_T), dimension(1) :: dset_dims,dimsf,chunk_dims,offset
integer(HID_T) :: plist_id
integer(HID_T) :: file_id,filespace,memspace
integer(HID_T) :: group_id,dset_id,dspace_id
character(len=str_medium) :: filename
integer :: rank,ierr,i
! data to write
integer :: nbl,nbl_gl,block_offset
integer, dimension(:),pointer :: nnodes_bl
! mpi stuff
integer :: info
integer, dimension(MPI_STATUS_SIZE) :: status
integer :: irank,iroot,nproccall MPI_INIT(ierr)
call MPI_COMM_RANK(MPI_COMM_WORLD,irank,ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD,nproc,ierr)
irank = irank+1
iroot = 1call h5open_f(ierr)
info = MPI_INFO_NULL
! set up block structure
nbl = irank*2
filename = 'lit_restart.h5'allocate(nnodes_bl(nbl))
nnodes_bl = (/(irank,i=1,nbl)/)
nbl_gl = 6
if(irank == iroot) then
block_offset = 0
else
block_offset = 2
end if! Setup file access property list with parallel I/O access.
call h5pcreate_f(H5P_FILE_ACCESS_F, plist_id, ierr)
call h5pset_fapl_mpio_f(plist_id, MPI_COMM_WORLD, info, ierr)! Create the file collectively.
call h5fcreate_f(filename, H5F_ACC_TRUNC_F, file_id, ierr, access_prp =
plist_id)
call h5pclose_f(plist_id, ierr)!**** block data *****
call h5gcreate_f(file_id,trim(hdf_block_group),group_id,ierr)
rank = 1
dset_dims = (/nbl_gl/)
chunk_dims = (/nbl/)
offset = (/block_offset/)
! create dataspace for hyperslab dataset
call h5screate_simple_f(rank,dset_dims,filespace,ierr)
call
h5dcreate_f(group_id,trim(hdf_nnodes_dset),H5T_NATIVE_INTEGER,filespace,&
dset_id,ierr)
call h5sclose_f(filespace, ierr)
call h5screate_simple_f(rank,chunk_dims, memspace, ierr)
! select hyperslab in file
call h5dget_space_f(dset_id,filespace,ierr)
call
h5sselect_hyperslab_f(filespace,H5S_SELECT_SET_F,offset,chunk_dims,ierr)
! new mpi list
call h5pcreate_f(H5P_DATASET_XFER_F,plist_id,ierr)
call h5pset_dxpl_mpio_f(plist_id,H5FD_MPIO_COLLECTIVE_F,ierr)
! write actual data
call h5dwrite_f(dset_id,H5T_NATIVE_INTEGER,nnodes_bl,dset_dims,ierr,&
file_space_id=filespace,mem_space_id=memspace,xfer_prp=plist_id)
call h5sclose_f(filespace,ierr)
call h5sclose_f(memspace,ierr)
call h5pclose_f(plist_id,ierr)
call h5dclose_f(dset_id,ierr)
call h5gclose_f(group_id,ierr)
write(*,'(2(a,i5))') 'hid_t: ',hid_t, ' hsize_t: ',hsize_t
write(*,'(5(a,i3))') 'rank: ',irank,' nbl: ',nbl, ' dset_dims:
',dset_dims, &
' chunk_dims: ',chunk_dims, ' offset: ',offset! close file
call h5fclose_f(file_id,ierr)call h5close_f(ierr)
call MPI_FINALIZE(ierr)
end program test
On my workstation this program behaves appropriately and the data set is:
h5dump -d /Block/nnodes_gl lit_restart.h5
HDF5 "lit_restart.h5" {
DATASET "/Block/nnodes_gl" {
DATATYPE H5T_STD_I32LE
DATASPACE SIMPLE { ( 6 ) / ( 6 ) }
DATA {
(0): 1, 1, 2, 2, 2, 2
}
}
}
However, on both the pvfs2 and luster clusters the output is:
h5dump -d /Block/nnodes_gl lit_restart.h5
HDF5 "lit_restart.h5" {
DATASET "/Block/nnodes_gl" {
DATATYPE H5T_STD_I32LE
DATASPACE SIMPLE { ( 6 ) / ( 6 ) }
DATA {
(0): 2, 2, 2, 2, 0, 0
}
}
}
I ran 'make check' on our pvfs2 cluster and got quite a few errors on the
parallel section of things. I've attached the relevant output in
out.debug. Have I configured hdf5 incorrectly?
Thanks,
Peter.
out.debug (21.1 KB)