Hi,
we are experiencing problems writing “large” 1D datasets on 64bit Linux systems when local writes exceed 512 MBytes or 134217728 elements for 4-byte int/float (1D dataspace) for local MPI ranks.
To reproduce, please see the following one-MPI-rank example, running with HDF5 1.10.4 and OpenMPI 3.1.3 on a 64bit Debian Linux 9.5 with 16 GByte RAM.
#include <mpi.h>
#include <hdf5.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
int write_HDF5(
MPI_Comm const comm, MPI_Info const info,
int* data, size_t len)
{
// property list
hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
// MPI-I/O driver
H5Pset_fapl_mpio(plist_id, comm, info);
// file create
char file_name[100];
sprintf(file_name, "%zu", len);
strcat(file_name, ".h5");
hid_t file_id = H5Fcreate(file_name, H5F_ACC_TRUNC,
H5P_DEFAULT, plist_id);
// dataspace
hsize_t dims[1] = {len};
hsize_t max_dims[1] = {len};
// hsize_t* max_dims = NULL;
hid_t filespace = H5Screate_simple(1,
dims,
max_dims);
// chunking
hid_t datasetCreationProperty = H5Pcreate(H5P_DATASET_CREATE);
// dataset
hid_t dset_id = H5Dcreate(file_id, "dataset1", H5T_NATIVE_INT,
filespace, H5P_DEFAULT,
datasetCreationProperty, H5P_DEFAULT);
// write
hid_t dset_plist_id = H5Pcreate(H5P_DATASET_XFER);
H5Pset_dxpl_mpio(dset_plist_id, H5FD_MPIO_COLLECTIVE);
// H5Pset_dxpl_mpio(dset_plist_id, H5FD_MPIO_INDEPENDENT); // default
herr_t status;
status = H5Dwrite(dset_id, H5T_NATIVE_INT,
H5S_ALL, filespace, dset_plist_id, data);
// close all
status = H5Pclose(plist_id);
status = H5Pclose(dset_plist_id);
status = H5Dclose(dset_id);
status = H5Fclose(file_id);
return 0;
}
int main(int argc, char* argv[])
{
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Info info = MPI_INFO_NULL;
MPI_Init(&argc, &argv);
size_t lengths[3] = {134217727u, 134217728u, 134217729u};
for( size_t i = 0; i < 3; ++i )
{
size_t len = lengths[i];
printf("Writing for len=%zu ...\n", len);
int* data = malloc(len * sizeof(int));
for( size_t k=0; k<len; ++k)
data[k] = 420;
write_HDF5(comm, info, data, len);
free(data);
printf("Finished write for len=%zu ...\n", len);
}
MPI_Finalize();
return 0;
}
$ h5pcc phdf5.c && ./a.out
Writing for len=134217727 ...
Finished write for len=134217727 ...
Writing for len=134217728 ...
Finished write for len=134217728 ...
Writing for len=134217729 ...
HDF5-DIAG: Error detected in HDF5 (1.10.4) MPI-process 0:
#000: H5Dio.c line 336 in H5Dwrite(): can't write data
major: Dataset
minor: Write failed
#001: H5Dio.c line 828 in H5D__write(): can't write data
major: Dataset
minor: Write failed
#002: H5Dmpio.c line 671 in H5D__contig_collective_write(): couldn't finish shared collective MPI-IO
major: Low-level I/O
minor: Write failed
#003: H5Dmpio.c line 2013 in H5D__inter_collective_io(): couldn't finish collective MPI-IO
major: Low-level I/O
minor: Can't get value
#004: H5Dmpio.c line 2057 in H5D__final_collective_io(): optimized write failed
major: Dataset
minor: Write failed
#005: H5Dmpio.c line 426 in H5D__mpio_select_write(): can't finish collective parallel write
major: Low-level I/O
minor: Write failed
#006: H5Fio.c line 165 in H5F_block_write(): write through page buffer failed
major: Low-level I/O
minor: Write failed
#007: H5PB.c line 1028 in H5PB_write(): write through metadata accumulator failed
major: Page Buffering
minor: Write failed
#008: H5Faccum.c line 826 in H5F__accum_write(): file write failed
major: Low-level I/O
minor: Write failed
#009: H5FDint.c line 258 in H5FD_write(): driver write request failed
major: Virtual File Layer
minor: Write failed
#010: H5FDmpio.c line 1844 in H5FD_mpio_write(): file write failed
major: Low-level I/O
minor: Write failed
Finished write for len=134217729 ...
$ du -hs 13421772*
513M 134217727.h5
513M 134217728.h5
4,0K 134217729.h5
Do we miss anything that needs to be passed to write more than 512 MByte from a rank for a single dataset?