Summary
When creating a selection using H5Sselect_hyperslab
from N
slabs, we observe that H5Sselect_hyperslab
has runtime that’s linear in the number of slabs already added. Hence the overall runtime is quadratic in the number of slabs.
This happens naturally in our I/O workloads. We commonly have indirect access, i.e. when we want to read the value of x
we first look up the ranges of indices [index_range(k) for k in range(N)]
and then load the indicated value from the dataset x
, e.g. by using a hyperslab and OR’ing the index ranges into a single selection.
Reproducer
Here’s a simple reproducer:
// file: linear.cpp
#include <hdf5.h>
#include <chrono>
#include <iostream>
hid_t select_none(hsize_t * dims) {
auto space_id = H5Screate_simple(2, dims, nullptr);
H5Sselect_none(space_id);
return space_id;
}
hid_t select(hsize_t * dims, size_t n_slabs) {
hid_t space_id = select_none(dims);
for(size_t i = 0; i < n_slabs; ++i) {
hsize_t offset[2] = {i * 3, 0};
hsize_t count[2] = {1, 4};
H5Sselect_hyperslab(space_id, H5S_SELECT_OR, offset, nullptr, count, nullptr);
}
return space_id;
}
int main() {
size_t n = 100000000, m = 20;
hsize_t dims[] = {n, m};
size_t nruns = 16;
for(size_t k = 10; k < nruns; ++k) {
size_t nsel = 1 << k;
auto t1 = std::chrono::steady_clock::now();
auto space_id = select(dims, nsel);
auto t2 = std::chrono::steady_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count() / 1e3;
std::cout << nsel << ", " << elapsed << "\n";
H5Sclose(space_id);
}
return 0;
}
which we compile and run as follows:
$ g++ -O2 -g linear.cpp -o linear -lhdf5 && ./linear
1024, 0.008
2048, 0.03
4096, 0.166
8192, 0.935
16384, 4.92
32768, 22.448
The first column contains the number of hyperslabs that are being OR’ed together and the second the runtime in seconds. Clearly, the runtime is quadratic, if not slightly worse.
Workaround
We noticed that H5Scombine_select
is not quadratic in the number of selected slabs. Therefore, one can use the fact that the union of sets is associative and use H5Scombine_select
to recursively combine the first and second half of the hyperslabs. Here’s the code to do so:
// file: divide-and-conquer.cpp
#include <hdf5.h>
#include <chrono>
#include <iostream>
hid_t select_none(hsize_t * dims) {
auto space_id = H5Screate_simple(2, dims, nullptr);
H5Sselect_none(space_id);
return space_id;
}
hid_t select(hsize_t * dims, const std::pair<size_t, size_t>& range) {
auto [i_min, i_max] = range;
if(i_min == i_max) {
return select_none(dims);
}
if(i_max - i_min == 1) {
auto space_id = select_none(dims);
hsize_t offset[2] = {i_min * 3, 0};
hsize_t count[2] = {1, 4};
H5Sselect_hyperslab(space_id, H5S_SELECT_OR, offset, nullptr, count, nullptr);
return space_id;
}
size_t piv = i_min + (i_max - i_min) / 2;
auto left_space_id = select(dims, {i_min, piv});
auto right_space_id = select(dims, {piv, i_max});
auto space_id = H5Scombine_select(left_space_id, H5S_SELECT_OR, right_space_id);
H5Sclose(left_space_id);
H5Sclose(right_space_id);
return space_id;
}
int main() {
size_t n = 100000000, m = 20;
hsize_t dims[] = {n, m};
size_t nruns = 16;
for(size_t k = 10; k < nruns; ++k) {
size_t nsel = 1 << k;
auto t1 = std::chrono::steady_clock::now();
auto space_id = select(dims, {0, nsel});
auto t2 = std::chrono::steady_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count() / 1e3;
std::cout << nsel << ", " << elapsed << "\n";
H5Sclose(space_id);
}
return 0;
}
We again compile and run as follows:
$ g++ -O2 -g divide-and-conquer.cpp -o divide-and-conquer -lhdf5 && ./divide-and-conquer
1024, 0.001
2048, 0.001
4096, 0.003
8192, 0.008
16384, 0.017
32768, 0.035
Related Topics:
There’s several (very old) related topics, but none of them seems to acknowledge the issue, or provide a workaround. For example: