Thank you very much for help! It started to work, but I still have some problems. Here is my updated code for reference. It has the same structure as before: MASTER creates file, creates datasets, creates attributes for every dataset and writes all attributes. Then, in a loop, every core writes its own data, but all cores participate in collective writing. I am using call h5sselect_none_f(filespace, ierr) in a loop for cores which should not write data for particular dataset (as you suggested), so every core writes only its own data to its own dataset.
! =========================================================
subroutine out3(maxmx,maxmy,maxmz,meqn,mbc,mx,my,&
& mz,xlower,ylower,zlower,dx,dy,dz,q,t,iframe)
! =========================================================
use mpi
use hdf5
implicit double precision (a-h,o-z)
!------------------ HDF variables ------------------!
integer(hid_t) :: plist_id ! property list identifier
integer(hid_t) :: dcpl ! property list identifier
integer(hid_t) :: file_id ! file identifier
integer(hid_t) :: dataset_id ! dataset identifier
integer(hid_t) :: dataspace_id ! dataspace identifier
double precision, allocatable :: attr_data(:,:) ! data to write
double precision, dimension(17) :: attr_datacur
INTEGER(HSIZE_T), DIMENSION(1) :: adims = (/17/) ! Attribute dimension
INTEGER(HSIZE_T), DIMENSION(1) :: data_dims
INTEGER(HID_T) :: attr_id ! Attribute identifier
INTEGER(HID_T) :: aspace_id ! Attribute dataspace identifier
INTEGER(HID_T) :: atype_id ! Attribute dataspace identifier
INTEGER :: arank = 1 ! Attribute rank
integer(hid_t) :: filespace, memspace, memd
!---------------------------------------------------!
!------------------ Filter variables ---------------!
double precision, allocatable, target :: data(:,:,:,:) ! data
integer(hsize_t), dimension(4) :: cdims = (/1,1,1,1/) ! chunks data dimensions
!---------------------------------------------------!
!------------------ miscellaneous ------------------!
character(len=3) :: c ! dataset name for specific rank
character(len=10) :: dataset_name
integer :: rank = 4 ! data rank. q is 4D
character(mpi_max_processor_name) hostname
dimension q(1-mbc:maxmx+mbc, 1-mbc:maxmy+mbc,1-mbc:maxmz+mbc,meqn)
integer(hsize_t), dimension(4) :: dimsf ! data dataset dimensions
integer :: i,j,k,l,m,info,idd
character*20 fname
common /mpicomm/ mpi_comm_3d, lx, ly, lz
common /mpi_proc_info/ np, id
!---------------------------------------------------!
call h5open_f(ierr)
dimsf(1) = mx
dimsf(2) = my
dimsf(3) = mz
dimsf(4) = meqn
allocate (data(dimsf(1),dimsf(2),dimsf(3),dimsf(4)))
allocate (attr_data(3,np))
info = mpi_info_null
fname = 'fort.q' &
& // char(ichar('0') + mod(iframe/1000,10)) &
& // char(ichar('0') + mod(iframe/100,10)) &
& // char(ichar('0') + mod(iframe/10,10)) &
& // char(ichar('0') + mod(iframe,10)) &
& // '.h5'
do k=1,mz
do j=1,my
do i=1,mx
do m=1,meqn
if (abs(q(i,j,k,m)) .lt. 1d-99) then
q(i,j,k,m) = 0.d0
end if
data(i,j,k,m) = q(i,j,k,m)
end do
end do
end do
end do
! have id 0 creates hdf5 file, data layout and write all attributes
if (id == 0) then
call h5tcopy_f(h5t_native_double,atype_id,ierr)
cdims(1) = dimsf(1)
cdims(2) = dimsf(2)
cdims(3) = dimsf(3)
cdims(4) = int(meqn/2)
idd = 1
do i=0,lx-1
do j=0,ly-1
!do kk=0,lz-1
attr_data(1,idd) = i*mx*dx
attr_data(2,idd) = j*my*dy
attr_data(3,idd) = k*mz*dz
idd = idd + 1
end do
end do
call h5screate_simple_f(arank,adims,aspace_id, ierr)
call h5fcreate_f(fname, h5f_acc_trunc_f, file_id, ierr)
call h5screate_simple_f(rank, dimsf, dataspace_id, ierr)
call h5pcreate_f(h5p_dataset_create_f, dcpl, ierr)
call h5pset_chunk_f(dcpl, 4, cdims, ierr)
call h5pset_deflate_f(dcpl, 6, ierr)
call h5pset_alloc_time_f(dcpl, h5d_alloc_time_early_f, ierr)
!call h5pset_alloc_time_f(dcpl, H5D_ALLOC_TIME_INCR_f, ierr)
attr_datacur(1) = gridno
attr_datacur(2) = level
attr_datacur(3) = mx
attr_datacur(4) = my
attr_datacur(5) = mz
attr_datacur(9) = dx
attr_datacur(10) = dy
attr_datacur(11) = dz
attr_datacur(12) = t
attr_datacur(13) = iframe
attr_datacur(14) = meqn
attr_datacur(15) = lx
attr_datacur(16) = ly
attr_datacur(17) = lz
do i=1,np
attr_datacur(6) = attr_data(1,i) ! xlower
attr_datacur(7) = attr_data(2,i) ! ylower
attr_datacur(8) = attr_data(3,i) ! zlower
write(c,"(i0)") i
dataset_name = "Pid" // trim(c)
call h5dcreate_f(file_id, dataset_name, h5t_native_double, &
dataspace_id, dataset_id, ierr, dcpl_id=dcpl)
call h5acreate_f(dataset_id,"Parameters",atype_id,aspace_id,attr_id, ierr)
data_dims(1) = 17
call h5awrite_f(attr_id, atype_id, attr_datacur, data_dims, ierr)
call h5aclose_f(attr_id, ierr)
call h5dclose_f(dataset_id, ierr)
enddo
call h5sclose_f(aspace_id, ierr)
call h5tclose_f(atype_id, ierr)
call h5sclose_f(dataspace_id, ierr)
call h5pclose_f(dcpl, ierr)
call h5fclose_f(file_id, ierr)
end if
call mpi_barrier(mpi_comm_3d, ierr)
call h5pcreate_f(h5p_file_access_f, plist_id, ierr)
call h5pset_fapl_mpio_f(plist_id, mpi_comm_3d, info, ierr)
call h5fopen_f(fname, h5f_acc_rdwr_f, file_id, ierr, plist_id)
call h5pclose_f(plist_id, ierr)
! h5p_dataset_xfer_f: property for raw data transfer
call h5pcreate_f(h5p_dataset_xfer_f, plist_id, ierr)
call h5pset_dxpl_mpio_f(plist_id, h5fd_mpio_collective_f, ierr)
do i=1,np
write(c,"(i0)") i
dataset_name = "Pid" // trim(c)
call h5dopen_f(file_id, dataset_name, dataset_id, ierr)
call h5dget_space_f(dataset_id,filespace,ierr)
if (id /= i-1) then
call h5sselect_none_f(filespace, ierr)
end if
call h5dwrite_f(dataset_id, h5t_native_double, data, &
& dimsf, ierr, file_space_id=filespace, xfer_prp = plist_id)
call h5sclose_f(filespace, ierr)
call h5dclose_f(dataset_id,ierr)
enddo
call h5pclose_f(plist_id, ierr)
call h5fclose_f(file_id, ierr)
call h5close_f(ierr)
return
end
After 67 successful outputs (67 files are written correctly with chunking and compression and to every dataset) I start to get errors like:
HDF5-DIAG: Error detected in HDF5 (1.10.3) MPI-process 72:
#000: H5Dio.c line 336 in H5Dwrite(): can't write data
major: Dataset
minor: Write failed
#001: H5Dio.c line 828 in H5D__write(): can't write data
major: Dataset
minor: Write failed
#002: H5Dmpio.c line 893 in H5D__chunk_collective_write(): write error
major: Dataspace
minor: Write failed
#003: H5Dmpio.c line 811 in H5D__chunk_collective_io(): couldn't finish filtered linked chunk MPI-IO
major: Low-level I/O
minor: Can't get value
#004: H5Dmpio.c line 1308 in H5D__link_chunk_filtered_collective_io(): couldn't construct filtered I/O info list
major: Dataset
minor: Unable to initialize object
#005: H5Dmpio.c line 2608 in H5D__construct_filtered_io_info_list(): unable to redistribute shared chunks
major: Dataset
minor: Write failed
#006: H5Dmpio.c line 2695 in H5D__chunk_redistribute_shared_chunks(): can't get fapl
major: Dataset
minor: Can't get value
#007: H5Fint.c line 203 in H5F_get_access_plist(): can't set file driver ID & info
major: Property lists
minor: Can't set value
#008: H5Pint.c line 3043 in H5P_set(): can't operate on plist to set value
major: Property lists
minor: Can't operate on object
#009: H5Pint.c line 2641 in H5P__do_prop(): can't operate on property
major: Property lists
minor: Can't operate on object
#010: H5Pint.c line 2883 in H5P__set_plist_cb(): can't set property value
major: Property lists
minor: Unable to initialize object
#011: H5Pfapl.c line 1182 in H5P__facc_file_driver_set(): can't copy file driver
major: Property lists
minor: Unable to copy object
#012: H5Pfapl.c line 1054 in H5P__file_driver_copy(): driver info copy failed
major: Property lists
minor: Unable to copy object
#013: H5FDmpio.c line 752 in H5FD_mpio_fapl_copy(): Communicator/Info duplicate failed
major: Internal error (too specific to document in detail)
minor: Unable to copy object
#014: H5FDmpi.c line 309 in H5FD_mpi_comm_info_dup(): MPI_Comm_dup failed
major: Internal error (too specific to document in detail)
minor: Some MPI function failed
#015: H5FDmpi.c line 309 in H5FD_mpi_comm_info_dup(): Other MPI error, error stack:
PMPI_Comm_dup(192)..................: MPI_Comm_dup(comm=0xc4000005, new_comm=0x7ffffec9f210) failed
PMPI_Comm_dup(171)..................: fail failed
MPIR_Comm_dup_impl(57)..............: fail failed
MPIR_Comm_copy(838).................: fail failed
MPIR_Get_contextid_sparse_group(676): Too many communicators (0/16384 free on this process; ignore_id=0)
major: Internal error (too specific to document in detail)
minor: MPI Error String
It seems that the problem is with “Too many communicators” (I have 240 cores and my simulation stops at 68th frame what is close to number of communicators in errror (240*68~=16384)) , but I am not sure why all communicators are busy, because everything is closed (all files, all datasets, dataspaces, property lists etc). Code worked fine without collective writing in a loop, so I think the problem is with filespace which somehow doesn’t close after writing. Do I use call h5sselect_none_f(filespace, ierr) properly here? I also tried to close filespace outside the loop, but it doesn’t help.
Thank you in advance.