problem with hdf51.8+pvfs2/lustre+mvapich1

Hello everyone,

I'm having some trouble getting HDF5 to work on our clusters. I've compiled
hdf5 with the following options:

export CC=mpicc
export CXX=mpicxx
export FC=mpif90

--enable-fortran --enable-parallel --disable-production --enable-debug=all

I wrote a simple fortran test program to be run with two processors below:

program test

  use hdf5
  implicit none
  include 'mpif.h'

  integer, parameter :: str_medium = 40
  ! group name
  character(len=str_medium) :: hdf_block_group = '/Block'
  ! dataset names
  character(len=str_medium) :: hdf_nnodes_dset = 'nnodes_gl'
  ! h5 file/group/dataset/dataspace descriptors
  integer(HSIZE_T), dimension(1) :: dset_dims,dimsf,chunk_dims,offset
  integer(HID_T) :: plist_id
  integer(HID_T) :: file_id,filespace,memspace
  integer(HID_T) :: group_id,dset_id,dspace_id
  character(len=str_medium) :: filename
  integer :: rank,ierr,i
  ! data to write
  integer :: nbl,nbl_gl,block_offset
  integer, dimension(:),pointer :: nnodes_bl
  ! mpi stuff
  integer :: info
  integer, dimension(MPI_STATUS_SIZE) :: status
  integer :: irank,iroot,nproc

  call MPI_INIT(ierr)
  call MPI_COMM_RANK(MPI_COMM_WORLD,irank,ierr)
  call MPI_COMM_SIZE(MPI_COMM_WORLD,nproc,ierr)
  irank = irank+1
  iroot = 1

  call h5open_f(ierr)

  info = MPI_INFO_NULL

  ! set up block structure
  nbl = irank*2
  filename = 'lit_restart.h5'

  allocate(nnodes_bl(nbl))
  nnodes_bl = (/(irank,i=1,nbl)/)
  nbl_gl = 6
  if(irank == iroot) then
     block_offset = 0
  else
     block_offset = 2
  end if

  ! Setup file access property list with parallel I/O access.
  call h5pcreate_f(H5P_FILE_ACCESS_F, plist_id, ierr)
  call h5pset_fapl_mpio_f(plist_id, MPI_COMM_WORLD, info, ierr)

  ! Create the file collectively.
  call h5fcreate_f(filename, H5F_ACC_TRUNC_F, file_id, ierr, access_prp =
plist_id)
  call h5pclose_f(plist_id, ierr)

  !**** block data *****
  call h5gcreate_f(file_id,trim(hdf_block_group),group_id,ierr)
  rank = 1
  dset_dims = (/nbl_gl/)
  chunk_dims = (/nbl/)
  offset = (/block_offset/)
  ! create dataspace for hyperslab dataset
  call h5screate_simple_f(rank,dset_dims,filespace,ierr)
  call
h5dcreate_f(group_id,trim(hdf_nnodes_dset),H5T_NATIVE_INTEGER,filespace,&
       dset_id,ierr)
  call h5sclose_f(filespace, ierr)
  call h5screate_simple_f(rank,chunk_dims, memspace, ierr)
  ! select hyperslab in file
  call h5dget_space_f(dset_id,filespace,ierr)
  call
h5sselect_hyperslab_f(filespace,H5S_SELECT_SET_F,offset,chunk_dims,ierr)
  ! new mpi list
  call h5pcreate_f(H5P_DATASET_XFER_F,plist_id,ierr)
  call h5pset_dxpl_mpio_f(plist_id,H5FD_MPIO_COLLECTIVE_F,ierr)
  ! write actual data
  call h5dwrite_f(dset_id,H5T_NATIVE_INTEGER,nnodes_bl,dset_dims,ierr,&
       file_space_id=filespace,mem_space_id=memspace,xfer_prp=plist_id)
  call h5sclose_f(filespace,ierr)
  call h5sclose_f(memspace,ierr)
  call h5pclose_f(plist_id,ierr)
  call h5dclose_f(dset_id,ierr)
  call h5gclose_f(group_id,ierr)
  write(*,'(2(a,i5))') 'hid_t: ',hid_t, ' hsize_t: ',hsize_t
  write(*,'(5(a,i3))') 'rank: ',irank,' nbl: ',nbl, ' dset_dims:
',dset_dims, &
       ' chunk_dims: ',chunk_dims, ' offset: ',offset

  ! close file
  call h5fclose_f(file_id,ierr)

  call h5close_f(ierr)

  call MPI_FINALIZE(ierr)

end program test

On my workstation this program behaves appropriately and the data set is:
h5dump -d /Block/nnodes_gl lit_restart.h5
HDF5 "lit_restart.h5" {
DATASET "/Block/nnodes_gl" {
   DATATYPE H5T_STD_I32LE
   DATASPACE SIMPLE { ( 6 ) / ( 6 ) }
   DATA {
   (0): 1, 1, 2, 2, 2, 2
   }
}
}

However, on both the pvfs2 and luster clusters the output is:
h5dump -d /Block/nnodes_gl lit_restart.h5
HDF5 "lit_restart.h5" {
DATASET "/Block/nnodes_gl" {
   DATATYPE H5T_STD_I32LE
   DATASPACE SIMPLE { ( 6 ) / ( 6 ) }
   DATA {
   (0): 2, 2, 2, 2, 0, 0
   }
}
}

I ran 'make check' on our pvfs2 cluster and got quite a few errors on the
parallel section of things. I've attached the relevant output in
out.debug. Have I configured hdf5 incorrectly?

Thanks,
Peter.

out.debug (21.1 KB)

Apparently if I reverse my offsets (ie. root process 1 and offset of 4 and
process 2 gets an offset of 0) then everything is written to the file (it
doesn't so much matter that it's in reverse as long it stays consistent).
I've generalized this to an arbirtrary number of processors and run it on 12
on our pvfs2 cluster and as long I reverse the block_offsets everything
appears to work fine...

Can someone explain this to me? Is this some C/Fortran funny business?

···

On Thu, Oct 7, 2010 at 3:25 PM, Peter Brady <petertbrady@gmail.com> wrote:

Hello everyone,

I'm having some trouble getting HDF5 to work on our clusters. I've
compiled hdf5 with the following options:

export CC=mpicc
export CXX=mpicxx
export FC=mpif90

--enable-fortran --enable-parallel --disable-production --enable-debug=all

I wrote a simple fortran test program to be run with two processors below:

program test

  use hdf5
  implicit none
  include 'mpif.h'

  integer, parameter :: str_medium = 40
  ! group name
  character(len=str_medium) :: hdf_block_group = '/Block'
  ! dataset names
  character(len=str_medium) :: hdf_nnodes_dset = 'nnodes_gl'
  ! h5 file/group/dataset/dataspace descriptors
  integer(HSIZE_T), dimension(1) :: dset_dims,dimsf,chunk_dims,offset
  integer(HID_T) :: plist_id
  integer(HID_T) :: file_id,filespace,memspace
  integer(HID_T) :: group_id,dset_id,dspace_id
  character(len=str_medium) :: filename
  integer :: rank,ierr,i
  ! data to write
  integer :: nbl,nbl_gl,block_offset
  integer, dimension(:),pointer :: nnodes_bl
  ! mpi stuff
  integer :: info
  integer, dimension(MPI_STATUS_SIZE) :: status
  integer :: irank,iroot,nproc

  call MPI_INIT(ierr)
  call MPI_COMM_RANK(MPI_COMM_WORLD,irank,ierr)
  call MPI_COMM_SIZE(MPI_COMM_WORLD,nproc,ierr)
  irank = irank+1
  iroot = 1

  call h5open_f(ierr)

  info = MPI_INFO_NULL

  ! set up block structure
  nbl = irank*2
  filename = 'lit_restart.h5'

  allocate(nnodes_bl(nbl))
  nnodes_bl = (/(irank,i=1,nbl)/)
  nbl_gl = 6
  if(irank == iroot) then
     block_offset = 0
  else
     block_offset = 2
  end if

  ! Setup file access property list with parallel I/O access.
  call h5pcreate_f(H5P_FILE_ACCESS_F, plist_id, ierr)
  call h5pset_fapl_mpio_f(plist_id, MPI_COMM_WORLD, info, ierr)

  ! Create the file collectively.
  call h5fcreate_f(filename, H5F_ACC_TRUNC_F, file_id, ierr, access_prp =
plist_id)
  call h5pclose_f(plist_id, ierr)

  !**** block data *****
  call h5gcreate_f(file_id,trim(hdf_block_group),group_id,ierr)
  rank = 1
  dset_dims = (/nbl_gl/)
  chunk_dims = (/nbl/)
  offset = (/block_offset/)
  ! create dataspace for hyperslab dataset
  call h5screate_simple_f(rank,dset_dims,filespace,ierr)
  call
h5dcreate_f(group_id,trim(hdf_nnodes_dset),H5T_NATIVE_INTEGER,filespace,&
       dset_id,ierr)
  call h5sclose_f(filespace, ierr)
  call h5screate_simple_f(rank,chunk_dims, memspace, ierr)
  ! select hyperslab in file
  call h5dget_space_f(dset_id,filespace,ierr)
  call
h5sselect_hyperslab_f(filespace,H5S_SELECT_SET_F,offset,chunk_dims,ierr)
  ! new mpi list
  call h5pcreate_f(H5P_DATASET_XFER_F,plist_id,ierr)
  call h5pset_dxpl_mpio_f(plist_id,H5FD_MPIO_COLLECTIVE_F,ierr)
  ! write actual data
  call h5dwrite_f(dset_id,H5T_NATIVE_INTEGER,nnodes_bl,dset_dims,ierr,&
       file_space_id=filespace,mem_space_id=memspace,xfer_prp=plist_id)
  call h5sclose_f(filespace,ierr)
  call h5sclose_f(memspace,ierr)
  call h5pclose_f(plist_id,ierr)
  call h5dclose_f(dset_id,ierr)
  call h5gclose_f(group_id,ierr)
  write(*,'(2(a,i5))') 'hid_t: ',hid_t, ' hsize_t: ',hsize_t
  write(*,'(5(a,i3))') 'rank: ',irank,' nbl: ',nbl, ' dset_dims:
',dset_dims, &
       ' chunk_dims: ',chunk_dims, ' offset: ',offset

  ! close file
  call h5fclose_f(file_id,ierr)

  call h5close_f(ierr)

  call MPI_FINALIZE(ierr)

end program test

On my workstation this program behaves appropriately and the data set is:
h5dump -d /Block/nnodes_gl lit_restart.h5
HDF5 "lit_restart.h5" {
DATASET "/Block/nnodes_gl" {
   DATATYPE H5T_STD_I32LE
   DATASPACE SIMPLE { ( 6 ) / ( 6 ) }
   DATA {
   (0): 1, 1, 2, 2, 2, 2
   }
}
}

However, on both the pvfs2 and luster clusters the output is:
h5dump -d /Block/nnodes_gl lit_restart.h5
HDF5 "lit_restart.h5" {
DATASET "/Block/nnodes_gl" {
   DATATYPE H5T_STD_I32LE
   DATASPACE SIMPLE { ( 6 ) / ( 6 ) }
   DATA {
   (0): 2, 2, 2, 2, 0, 0
   }
}
}

I ran 'make check' on our pvfs2 cluster and got quite a few errors on the
parallel section of things. I've attached the relevant output in
out.debug. Have I configured hdf5 incorrectly?

Thanks,
Peter.