The attached demonstrates a fix to my problem.
Jarom
From: Nelson, Jarom
Sent: Wednesday, January 06, 2016 2:44 PM
To: 'hdf-forum@lists.hdfgroup.org'
Subject: Parallel HDF5: creation of group/dataset with different names
results in the same group/dataset ID across ranks
I’m developing the HDF5 interfaces for our team’s MPI parallel
application, and I’m having some trouble getting it to work correctly
when each rank creates a different dataset or group in the same file.
I’ve attached a sample program that illustrates my problem.
I expect to get a file that has the following structure:
/ Group
/common\ group Group
/common\ group/rank0 Dataset {1}
/common\ group/rank1 Dataset {1}
/rank0 Group
/rank0/common\ dataset Dataset {1}
/rank1 Group
/rank1/common\ dataset Dataset {1}
But instead, I get a file like this:
/ Group
/common\ group Group
/common\ group/rank0 Dataset {1}
/rank0 Group
/rank0/common\ dataset Dataset {1}
All the data from rank 1 are missing. Similarly if I run more nodes,
only the group and dataset for rank0 are found in the file. The data
found within the dataset varies with each run, and I suspect it’s just
whichever happens to be the last rank to write to the file.
I am printing out the group and dataset hid_t when created, and they
are always identical across all the ranks. Not sure if this is
expected, but it was unexpected to me.
Jarom Nelson
Lawrence Livermore National Lab
National Ignition Facility
Virtual Beam-line Software
7000 East Ave. L-460
Livermore, CA 94550
(925)423-3953
//
// Created by nelson99 on 1/5/16.
//
#include "hdf5.h"
#include <iostream>
#include <string>
#include <assert.h>
#include <mpi.h>
/**
* @brief Test to demonstrate what I believe is a bug in parallel HDF5
*/
int main(int argc, char **argv) {
try {
/*
* MPI variables
*/
int mpi_size, mpi_rank;
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Info info = MPI_INFO_NULL;
/*
* Initialize MPI
*/
MPI_Init(&argc, &argv);
MPI_Comm_size(comm, &mpi_size);
MPI_Comm_rank(comm, &mpi_rank);
std::string outfilename;
if (mpi_size > 1) {
outfilename = "h5g_output_parallel.h5";
} else {
outfilename = "h5g_output_serial.h5";
}
hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
H5Pset_fapl_mpio(plist_id, comm, info);
hid_t file_id = H5Fcreate(outfilename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, plist_id);
H5Pclose(plist_id);
hsize_t dims[1] = {1};
hid_t datatype = H5T_STD_I8LE;
std::int8_t data1[1] = {(int8_t) (mpi_rank+100)};
std::int8_t data2[1] = {(int8_t) (mpi_rank-100)};
// hid_t datatype = H5T_STD_I32LE;
// std::int32_t data[1] = {mpi_rank};
// dataspace is the same for all the datasets below
hid_t dataspace = H5Screate_simple(1, dims, dims);
// create a common group to contain distinct datasets for each rank
hid_t common_group = H5Gcreate(file_id, "common group", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
std::cout << "rank " << mpi_rank << ": /common group/ ID: "
<< common_group << std::endl;
// do collective calls to create all the distinct datasets for each rank
// (each rank must create each dataset)
hid_t dataset_by_rank[mpi_rank];
for (int i = 0; i < mpi_size; ++i) {
std::string rank_name = "rank";
rank_name += std::to_string(i);
std::cout << rank_name << std::endl;
dataset_by_rank[i] = H5Dcreate(common_group, rank_name.c_str(), datatype,
dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
std::cout << "rank " << mpi_rank << " /common group/" << rank_name << " ID: "
<< dataset_by_rank[i] << std::endl;
}
// set up dataset transfer property list for collective MPI I/O
hid_t xferplist = H5Pcreate(H5P_DATASET_XFER);
// H5Pset_dxpl_mpio(xferplist, H5FD_MPIO_INDEPENDENT);
H5Pset_dxpl_mpio(xferplist, H5FD_MPIO_COLLECTIVE);
// each rank writes it's own rank to the corresponding dataset for that rank
H5Dwrite(dataset_by_rank[mpi_rank], datatype, H5S_ALL, H5S_ALL, xferplist, data1);
// collective calls to close each dataset
for (int i = 0; i < mpi_size; ++i) {
H5Dclose(dataset_by_rank[i]);
}
H5Gclose(common_group);
// do collective calls to create all the groups for every rank
// (each rank must create each group, and each dataset within each group)
hid_t group_by_rank[mpi_size];
for (int i = 0; i < mpi_size; ++i) {
std::string rank_name = "rank";
rank_name += std::to_string(i);
std::cout << rank_name << std::endl;
group_by_rank[i] = H5Gcreate(file_id, rank_name.c_str(),
H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
std::cout << "rank " << mpi_rank << " /" << rank_name << "/ ID: "
<< group_by_rank[i] << std::endl;
dataset_by_rank[i] = H5Dcreate(group_by_rank[i], "common dataset", datatype,
dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
std::cout << "rank " << mpi_rank << " /" << rank_name << "/common dataset ID: "
<< dataset_by_rank[i] << std::endl;
}
// then each rank does an independent call to write data to the corresponding dataset
H5Dwrite(dataset_by_rank[mpi_rank], datatype, H5S_ALL, H5S_ALL, xferplist, data2);
H5Pclose(xferplist);
H5Sclose(dataspace);
for (int i = 0; i < mpi_size; ++i) {
H5Dclose(dataset_by_rank[i]);
H5Gclose(group_by_rank[i]);
}
H5Fclose(file_id);
MPI_Finalize();
} catch (std::exception &e) {
std::cerr << "std::exception thrown:" << e.what() << std::endl;
return -1;
} catch (int e) {
std::cerr << "Unrecognized error thrown" << e << std::endl;
return e ? e : -1;
}
return 0;
}