Hi,
I think I've come across a performance issue with H5Dread when reading
non-contiguous hyperslab selections. The use case in my software is a bit
complicated, so instead I came up with a small example that shows the same
issue. Please let me know if I'm missing something here, it's possible
that a different approach could be much better.
In my example I write a 2D native int chunked dataset to an HDF5 file
(adapted from the h5_extend example, now writes a 229 MB file). I then
construct a hyperslab selection of the dataset and read it back using a
single call to H5Dread. When I use a stride of 1 (so all elements of the
selection are contiguous) the read is very fast. However, when I set the
stride to 2 the read time slows down significantly, on the order of 15
times slower.
The dataset has a chunk shape of 1000x500, and the 0th dimension is the one
being tested with a stride of 1 and 2. Is this a typical slowdown seen
with a stride of 2? If the chunksize is 1000, then a stride of 1 and 2
would still need to read the same amount of data, so I would expect similar
performance.
I've run the stride of 2 scenario under Valgrind (using the callgrind tool)
for profiling and it shows that 95% of the time is being spent in
H5S_select_iterate (I can share the callgrind output if it helps), which is
making this program CPU bound not I/O bound.
I'm using an up to date version of HDF5 trunk from checked out from
subversion. I looked at the callback H5D__chunk_io_init() used by
H5S_select_iterate(). I noticed that there are two different approaches
taken, one for the case where the shape of the memory space is the same as
the dataspace, and another if the shapes are different. The performance
drop I've noticed appears to be for the latter case.
Any ideas on how to optimize this function or otherwise increase the
performance of this use case?
Thanks,
Chris LeBlanc
···
--
Here is the example code. I wrote this mail earlier and included it as an
attachment and haven't seen it appear on the mailing list so I'm trying
again with the text inline:
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* *
* Copyright by The HDF Group.
*
* Copyright by the Board of Trustees of the University of Illinois.
*
* All rights reserved.
*
*
*
* This file is part of HDF5. The full HDF5 copyright notice, including
*
* terms governing use, modification, and redistribution, is contained in
*
* the files COPYING and Copyright.html. COPYING can be found at the root
*
* of the source code distribution tree; Copyright.html can be found at the
*
* root level of an installed copy of the electronic HDF5 document set and
*
* is linked from the top-level documents page. It can also be found at
*
* http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have
*
* access to either file, you may request a copy from help@hdfgroup.org.
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* */
/*
* This example how to work with extendible datasets. The dataset
* must be chunked in order to be extendible.
*
* It is used in the HDF5 Tutorial.
*/
// Modified example of h5_extend.c to show performance difference between
reading with a stride of 1 vs 2:
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include "hdf5.h"
#define FILE "extend.h5"
#define DATASETNAME "ExtendibleArray"
#define RANK 2
void write_file() {
hid_t file; /* handles */
hid_t dataspace, dataset;
hid_t filespace, memspace;
hid_t cparms;
hsize_t dims[2] = {20000, 3000}; /* dataset dimensions
at creation time */
hsize_t maxdims[2] = {H5S_UNLIMITED, H5S_UNLIMITED};
herr_t status;
hsize_t chunk_dims[2] = {1000, 500};
int *data = calloc(dims[0]*dims[1], sizeof(int));
/* Variables used in reading data back */
hsize_t chunk_dimsr[2];
hsize_t dimsr[2];
hsize_t i, j;
int *datar = calloc(dims[0]*dims[1], sizeof(int));
herr_t status_n;
int rank, rank_chunk;
/* Create the data space with unlimited dimensions. */
dataspace = H5Screate_simple (RANK, dims, maxdims);
/* Create a new file. If file exists its contents will be overwritten.
*/
file = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
/* Modify dataset creation properties, i.e. enable chunking */
cparms = H5Pcreate (H5P_DATASET_CREATE);
status = H5Pset_chunk (cparms, RANK, chunk_dims);
/* Create a new dataset within the file using cparms
creation properties. */
dataset = H5Dcreate2 (file, DATASETNAME, H5T_NATIVE_INT, dataspace,
H5P_DEFAULT, cparms, H5P_DEFAULT);
status = H5Sclose (dataspace);
/* Write data to dataset */
status = H5Dwrite (dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL,
H5P_DEFAULT, data);
/* Close resources */
status = H5Dclose (dataset);
status = H5Fclose (file);
status = H5Pclose (cparms);
free(data);
}
void read_file(hsize_t dim1_stride, hsize_t dim2_stride) {
/* Variables used in reading data back */
hid_t file;
hid_t dataspace, dataset;
hid_t filespace, memspace;
hsize_t chunk_dimsr[2];
hsize_t dimsr[2];
hsize_t memspace_dims[2];
hsize_t i, j;
int *datar;
hsize_t mem_offsets[2] = {0, 0};
hsize_t strides[2] = {dim1_stride, dim2_stride};
hsize_t count[2];
herr_t status_n;
int rank_chunk;
file = H5Fopen (FILE, H5F_ACC_RDONLY, H5P_DEFAULT);
dataset = H5Dopen2 (file, DATASETNAME, H5P_DEFAULT);
filespace = H5Dget_space (dataset);
//rank = H5Sget_simple_extent_ndims (filespace);
status_n = H5Sget_simple_extent_dims (filespace, dimsr, NULL);
memspace_dims[0] = dimsr[0] / strides[0];
memspace_dims[1] = dimsr[1];
memspace = H5Screate_simple (RANK, memspace_dims, NULL);
count[0] = dimsr[0] / strides[0];
count[1] = dimsr[1];
// core of this test: a hyperslab with varying stride:
H5Sselect_hyperslab( filespace, H5S_SELECT_SET, mem_offsets, strides,
count, NULL );
datar = calloc(memspace_dims[0]*memspace_dims[1], sizeof(int));
printf("reading with stride = %d, memspace_dims: %d %d, count: %d
%d\n", (int) strides[0], (int) memspace_dims[0], (int) memspace_dims[1],
(int) count[0], (int) count[1]);
time_t t1 = time(NULL);
int status = H5Dread (dataset, H5T_NATIVE_INT, memspace, filespace,
H5P_DEFAULT, datar);
time_t t2 = time(NULL);
printf("done reading with stride = %d, time = %d (nearest sec)\n",
(int) strides[1], (int) (t2-t1) );
status = H5Dclose (dataset);
status = H5Sclose (filespace);
status = H5Sclose (memspace);
status = H5Fclose (file);
free(datar);
}
int main (void)
{
write_file();
read_file(1, 1);
read_file(2, 1);
}