VDS: Unable to create a VDS linking large number of datasets

Hi,

I have 14,430 HDF5 files. Each one of these HDF5 files contains a dataset of datatype float of size = 49152 x 21 x 1. I am a trying to create a VDS dataset of size = 49152 x 21 x 14430 mapping each dataset in all the 14,430 files. The code runs fine without errors and I can see that it creates a VDS of size 49152 x 21 x 14430 and writes it to a HDF5 file. However, if I open (or) do a h5dump of the VDS, I see that the VDS has right data until 49152 x 21 x 1024 and rest of the values in the VDS are zeros. It looks like only the first 1024 files are mapped and rest have not been mapped. But if I do β€œh5dump -p” on the hdf5 file containing the VDS, it shows that all the 14,430 files are virtually mapped. Is there any limit on the memory (or) number of files that can be mapped that I am not aware of? Am I missing something?. I am attaching the code below for reference. I am using HDF5 1.10.3. Thanks in advance.

#include <math.h>
#include <float.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <string.h>
#include <dirent.h>
#include <assert.h>
#include <sys/stat.h>
#include β€œhdf5.h”

#define REAL float
#define H5T_NATIVE_RL H5T_NATIVE_FLOAT
#define MPI_RL MPI_FLOAT

#define RANK 2
#define PAR_RANK 3
#define LENLINE 4096
#define MAXLEN 256
#define DOMAINDATA 6
#define GRIDDATA 6
#define MAXFILE 20000

int main(int argc, char **argv)
{

double starttime, endtime;
double tempTS, tempTE;
time_t start, stop;
int ii, iProc;
int i,j,m, colorTag;
int pCount;
double tempR;
int iStartProc, iEndProc;
int nProcs, parInitID, my_id;
long int parID, parNum;
int nproc=0,iStartFrame=0,iEndFrame=0,iFrameIncr=1, iFrame;
REAL rdata;
char *rundir, *probName, fileName[LENLINE], hdf5FileName[LENLINE], parDSetName[LENLINE];
char absPATH[256];
int frameIndex;
int totalParCount;

int have_time = time(&start); /* Is current calendar time (UTC) available? */

/* HDF5 APIs definitions /
hid_t file_id; /
file and dataset identifiers /
hid_t plist_id; /
property list identifier( access template) /
hsize_t dims;
herr_t status;
hid_t dataspace_id;
hid_t memspace_id;
hid_t attribute_id;
hsize_t count[PAR_RANK]; /
size of subset in the file /
hsize_t offset[PAR_RANK]; /
subset offset in the file /
hsize_t stride[PAR_RANK];
hsize_t block[PAR_RANK];
hid_t gridGrp_id;
hid_t domainGrp_id;
hid_t timeGrp_id;
hid_t parGrp_id;
hid_t gridDSet_id;
hid_t domainDSet_id;
hid_t runTimeDSet_id;
hid_t timeStepDSet_id;
hid_t parDSet_id;
hid_t filespace; /
file and memory dataspace identifiers /
hsize_t pDims[PAR_RANK]; /
dataset dimensions */
hsize_t Dims[RANK];

/* Read Arguments /
for (i=1; i<argc; i++) {
/
If argv[i] is a 2 character string of the form β€œ-?” then: /
if(argv[i] == β€˜-’ && (argv[i]+1) != β€˜\0’ && (argv[i]+2) == β€˜\0’){
switch(
(argv[i]+1)) {
case β€˜n’: /
-n /
nproc = atoi(argv[++i]);
break;
case β€˜p’: /
-p /
probName = argv[++i];
break;
case β€˜f’: /
-f <# range(iStartFrame:iEndFrame:iFrameIncr)>
/
sscanf(argv[++i],"%d:%d:%d",&iStartFrame,&iEndFrame,&iFrameIncr);
if (iEndFrame == 0) iEndFrame = iStartFrame;
break;
case β€˜d’: /
-d /
rundir = argv[++i];
break;
case β€˜h’: /
-h /
usage(argv[0]);
break;
case β€˜m’: /
-h */
mode = atoi(argv[++i]);
break;
default:
usage(argv[0]);
break;
}
}
}

/* Construct absolute path of the run directory and change dir. */
realpath(rundir, absPATH);
chdir(absPATH);

my_id = 0;



totalParCount = 16384*3;

/* ----------------------------------------------------------------------- /
/
Perform virtual mapping across the frames to create VDS */

hid_t       vdcpl_id; 
hid_t       vds_space_id; 
hid_t       src_dataspace_id;
hid_t       vdset_id; 
hsize_t     vdims[PAR_RANK];
hsize_t     src_dims[PAR_RANK];
hsize_t     voffset[PAR_RANK];
hsize_t     vcount[PAR_RANK];
hsize_t     vstride[PAR_RANK];
hsize_t     vblock[PAR_RANK];


vdcpl_id = H5Pcreate(H5P_DATASET_CREATE);

vdims[0] = totalParCount;
vdims[1] = 21;
vdims[2] = iEndFrame-iStartFrame+1;

vds_space_id = H5Screate_simple(PAR_RANK, vdims, NULL);

offset[0] = 0;
offset[1] = 0;
offset[2] = 0;

count[0]  = 1;
count[1]  = 1;  
count[2]  = 1;

stride[0] = 1;
stride[1] = 1;
stride[2] = 1;

block[0]    = totalParCount;
block[1]    = 21;
block[2]    = 1;

src_dims[2] = 1;
src_dims[1] = 21;
src_dims[0] = totalParCount;



for(i=iStartFrame; i<=iEndFrame; i++){

    frameIndex = i - iStartFrame;

    voffset[0] = 0;
    voffset[1] = 0;
    voffset[2] = frameIndex;

    vstride[0] = 1;
    vstride[1] = 1;
    vstride[2] = 1;

    vblock[0]    = totalParCount;
    vblock[1]    = 21;
    vblock[2]    = 1;
    
    vcount[0]  = 1;
    vcount[1]  = 1;  
    vcount[2]  = 1;

    src_dataspace_id = H5Screate_simple(PAR_RANK, src_dims, NULL);

    H5Sselect_hyperslab(src_dataspace_id, H5S_SELECT_SET, offset, stride, block, count);

    H5Sselect_hyperslab(vds_space_id, H5S_SELECT_SET, voffset, vstride, vblock, vcount);

    sprintf(hdf5FileName, "ParData.%s.%d.h5",probName,frameIndex);

    sprintf(parDSetName, "/Particles/particle_%d", frameIndex );

    status = H5Pset_virtual(vdcpl_id, vds_space_id, hdf5FileName, parDSetName, src_dataspace_id);

    if(status < 0){
        printf("Proc:%04d ************** Failed creating virtual mapping %s ****************\n\n",
                    my_id, hdf5FileName);
        return 1;
    }

    printf("Proc:%04d ************** Creating virtual mapping %s ****************\n\n",
                my_id, hdf5FileName);

    H5Sclose(src_dataspace_id);

}

sprintf(hdf5FileName, "ParData.%s.0.h5",probName);

    
file_id = H5Fopen(hdf5FileName, H5F_ACC_RDWR, H5P_DEFAULT);


/* Create VDS in the 0th frame HDF5 file inside the Particles Grp. */
parGrp_id = H5Gopen1(file_id, "/Particles");

vdset_id = H5Dcreate(parGrp_id, "VDS", H5T_NATIVE_RL, vds_space_id, H5P_DEFAULT,
                        vdcpl_id, H5P_DEFAULT);

H5Sclose(vds_space_id);
H5Gclose(parGrp_id);
H5Dclose(vdset_id);
H5Pclose(vdcpl_id);
H5Fclose(file_id);


if(my_id == 0)
    printf("\n\nProc:%04d ************** Finished ****************\n", my_id);


return 1;

}

Hi, you might run into a limitation of the operating system’s ability on how many files can be kept open per process. Which OS are you using? In Linux it can be configured via ulimit, see e.g.:

Under Windows, the Posix layer used by the sec2 file I/O is limited to a mere 2048 open files with no way around it. However, using native Windows API functions the number of open files is unlimited. So, it may be it an OS limitation that impacts your case.

1 Like

Thank you for your prompt response. Yes, it is the ulimit of the Linux OS that was causing this issue. I increased the limit and the VDS mapping works fine now. Thanks for your help.