Hi,
I am experiencing unusual problem while reading very large number of
small (38MB in size) in a loop. The HF5open call which takes almost no time
(<1 millisecond) in the beginning suddenly increases to on an average 10
milliseconds after reading few number of files (~700). I've included code
below that reads some 1000 files each of size around 38MB and loads that
data in memory. After reading some hundreds of file the HF5open call
consistently takes longer. NOTE: all files are identical.
I am using version HDF5 1.8.11 and running on windows 8.1
-------------------- CODE START -----------------------
#include <iostream>
#include <windows.h>
#include <boost/filesystem.hpp>
#include <fstream>
#include <boost/lexical_cast.hpp>
#include <hdf5.h>
namespace fs = boost::filesystem;
#define DATASETNAME "Data"
#define NX 100
#define NY 100000
int main(int argc, char* argv[])
{
float* data = (float *) malloc((NX+1)*(NY+1) * sizeof(float));
long int before = GetTickCount();
std::size_t i = 0;
int f = 0;
fs::path someDir("D:\\TICKET8_test_data\\testdataset\\random\\gen2");
fs::directory_iterator end_iter;
boolean done = false;
if ( fs::exists(someDir) && fs::is_directory(someDir))
{
for( fs::directory_iterator dir_iter(someDir) ; dir_iter != end_iter ;
++dir_iter)
{
std::string filePath = dir_iter->path().string();
std::string ext = fs::extension(dir_iter->path());
if (ext == ".mcd" && fs::is_regular_file(dir_iter->status()) )
{
clock_t start_time, end_time, loop_start, loop_end;
clock_t diff_timeL, diff_time1, diff_time2, diff_time3, diff_time4;
const char* cStringPath = filePath.c_str();
start_time = clock();
hid_t fapl_id = H5Pcreate( H5P_FILE_ACCESS );
end_time = clock();
diff_time1 = end_time - start_time;
start_time = clock();
H5Pset_fclose_degree( fapl_id, H5F_CLOSE_STRONG );
end_time = clock();
diff_time2 = end_time - start_time;
start_time = clock();
hid_t acs_hdfFile = H5Fopen( cStringPath, H5F_ACC_RDONLY, fapl_id );
end_time = clock();
diff_time3 = end_time - start_time;
std::cout << "------" << std::endl;
hid_t dataset = H5Dopen(acs_hdfFile, DATASETNAME, H5P_DEFAULT);
hsize_t dims[2];
dims[0] = NY;
dims[1] = NX;
hid_t filespace = H5Dget_space(dataset);
hid_t memspace = H5Screate_simple(2, dims, NULL);
start_time = clock();
herr_t status = H5Dread(dataset, H5T_NATIVE_INT, memspace,
filespace, H5P_DEFAULT, data);
end_time = clock();
diff_time4 = end_time - start_time;
std::cout << filePath << " " << diff_time1 << " " << diff_time2 <<
" " << diff_time3 << " " << diff_time4 << std::endl;
H5Dclose(dataset);
H5Sclose(filespace);
H5Sclose(memspace);
H5Fclose(acs_hdfFile);
f++;
}
}
}
free(data);
long int after = GetTickCount();
std::cout << "TOTAL TIME to process " << f << " files " << (after -
before)/60000 << " minutes.";
}
-------------------- CODE END --------------------------