CMakeLists.txt for HDF5 Shared C Multithreaded Library

Hello all, I’m attempting to use CMakeLists to build the HDF5 library in a manner that will allow me to allow multiple threads of my program to write to their own specific .h5 files. Each H5 file is only ever accessed by its specific thread, so I believe this should be thread safe. I’m new to CMakeLists, and I’d appreciate if anyone can give me assistance on how to use it to build HDF5 for multithreading use.

See below my CMakeList code with the debugging and error messages that I get when building from local installation and attempting to build from source.

===

cmake_minimum_required(VERSION 3.12)
set(CMAKE_VERBOSE_MAKEFILE ON)
include(FetchContent)
project(farfield)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

set(RAYLIB_VERSION 5.0)
set(raylib_VERBOSE 1)
find_package(raylib ${RAYLIB_VERSION} QUIET) # QUIET or REQUIRED
if (NOT raylib_FOUND) # If there’s none, fetch and build raylib
FetchContent_Declare(
raylib
DOWNLOAD_EXTRACT_TIMESTAMP OFF
URL https://github.com/raysan5/raylib/archive/refs/tags/${RAYLIB_VERSION}.tar.gz
)
FetchContent_GetProperties(raylib)
if (NOT raylib_POPULATED) # Have we downloaded raylib yet?
set(FETCHCONTENT_QUIET NO)
FetchContent_Populate(raylib)
set(BUILD_EXAMPLES OFF CACHE BOOL “” FORCE) # don’t build the supplied examples
set(BUILD_TESTING OFF CACHE BOOL “” FORCE)
add_subdirectory(${raylib_SOURCE_DIR} ${raylib_BINARY_DIR})
endif()
endif()

Select HDF5 build strategy

set(HDF5_BUILD_FROM_SOURCE ON CACHE BOOL “” FORCE)

HDF5 configuration flags

set(BUILD_TESTING OFF CACHE BOOL “” FORCE)
set(BUILD_SHARED_LIBS ON CACHE BOOL “Build shared libraries” FORCE)
set(BUILD_STATIC_LIBS OFF CACHE BOOL “Build static libraries” FORCE)
set(HDF5_ENABLE_THREADSAFE ON CACHE BOOL “” FORCE)
set(HDF5_BUILD_HL_LIB OFF CACHE BOOL “” FORCE) # Disable high-level APIs for thread safety
set(HDF5_BUILD_EXAMPLES OFF CACHE BOOL “” FORCE) # Disable examples
set(HDF5_BUILD_TOOLS OFF CACHE BOOL “” FORCE) # Disable tools

HDF5 build

if (HDF5_BUILD_FROM_SOURCE)
FetchContent_Declare(
hdf5
GIT_REPOSITORY GitHub - HDFGroup/hdf5: Official HDF5® Library Repository
GIT_TAG hdf5_1.14.5
)
FetchContent_MakeAvailable(hdf5)
set (LINK_LIBS ${LINK_LIBS} hdf5-shared)
set (HDF5_INCLUDE_DIRS ${hdf5_BINARY_DIR}/src)
else()
# Load pre-installed HDF5 package
set (LIB_TYPE SHARED) # STATIC or SHARED
string(TOLOWER ${LIB_TYPE} SEARCH_TYPE)
find_package(HDF5 NAMES hdf5 COMPONENTS C ${SEARCH_TYPE})
set (LINK_LIBS ${LINK_LIBS} ${HDF5_C_${LIB_TYPE}_LIBRARY})
endif()

HDF5 package information

message(STATUS “hdf5_POPULATED: ${hdf5_POPULATED}”)
message(STATUS “hdf5_BINARY_DIR: ${hdf5_BINARY_DIR}”)
message(STATUS “hdf5_SOURCE_DIR: ${hdf5_SOURCE_DIR}”)
message(STATUS “HDF5 Found: ${HDF5_FOUND}”)
message(STATUS “HDF5_VERSION: ${HDF5_VERSION}”)
message(STATUS “HDF5_INCLUDE_DIRS: ${HDF5_INCLUDE_DIRS}”)
message(STATUS “HDF5_LIBRARIES: ${HDF5_LIBRARIES}”)
message(STATUS “HDF5_DEFINITIONS: ${HDF5_DEFINITIONS}”)
message(STATUS “HDF5 include: ${HDF5_INCLUDE_DIR}”)
message(STATUS “HDF5 library: ${LINK_LIBS}”)

Other build code

file(GLOB_RECURSE SOURCES “src/*.cpp”)
message(STATUS “Cpp file sources:”)
foreach(SOURCE ${SOURCES})
message(STATUS “- ${SOURCE}”)
endforeach()
add_executable(${PROJECT_NAME} ${SOURCES})
add_subdirectory(src)
set_target_properties(${PROJECT_NAME} PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${PROJECT_NAME})
set_property(TARGET ${PROJECT_NAME} PROPERTY VS_DEBUGGER_WORKING_DIRECTORY $<TARGET_FILE_DIR:${PROJECT_NAME}>)
if (“${PLATFORM}” STREQUAL “Web”)
add_custom_command(
TARGET ${PROJECT_NAME} PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/src/resources $<TARGET_FILE_DIR:${PROJECT_NAME}>/…/resources
)
#DEPENDS ${PROJECT_NAME}
else()
add_custom_command(
TARGET ${PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/src/resources $<TARGET_FILE_DIR:${PROJECT_NAME}>/resources
)
#DEPENDS ${PROJECT_NAME}
endif()

Linking and include directories

target_link_libraries(${PROJECT_NAME} raylib)
target_link_libraries (${PROJECT_NAME} ${LINK_LIBS}) # Link HDF5 libraries
target_include_directories(${PROJECT_NAME} PRIVATE ${HDF5_INCLUDE_DIRS}) # Include HDF5 headers

if (“${PLATFORM}” STREQUAL “Web”)
# Tell Emscripten to build an example.html file.
set_target_properties(${PROJECT_NAME} PROPERTIES SUFFIX “.html”)
target_link_options(${PROJECT_NAME} PUBLIC -sUSE_GLFW=3 PUBLIC --preload-file resources)
endif()
if (APPLE)
target_link_libraries(${PROJECT_NAME} “-framework IOKit”)
target_link_libraries(${PROJECT_NAME} “-framework Cocoa”)
target_link_libraries(${PROJECT_NAME} “-framework OpenGL”)
endif()

===

BUILDING FROM LOCAL INSTALLATION - BUILDS AND RUNS, BUT CANNOT MULTITHREAD

When I set HDF5_BUILD_FROM_SOURCE to OFF, then it uses the local installation on my Windows machine, built using the .msi installer from HDF5 website. The program launches successfully, and all single threaded H5DF functions work normally, however when I try the same commands in a second thread, I get errors like this:

HDF5-DIAG: Error detected in HDF5 (1.14.5):
#000: D:\a\hdf5\hdf5\hdf5-1.14.5\src\H5D.c line 186 in H5Dcreate2(): unable to synchronously create dataset
major: Dataset
minor: Unable to create file
#001: D:\a\hdf5\hdf5\hdf5-1.14.5\src\H5D.c line 135 in H5D__create_api_common(): unable to create dataset
major: Dataset
minor: Unable to create file
#002: D:\a\hdf5\hdf5\hdf5-1.14.5\src\H5VLcallback.c line 1870 in H5VL_dataset_create(): can’t reset VOL wrapper info
major: Virtual Object Layer
minor: Can’t reset object
#003: D:\a\hdf5\hdf5\hdf5-1.14.5\src\H5VLint.c line 2406 in H5VL_reset_vol_wrapper(): no VOL object wrap context?
major: Virtual Object Layer
minor: Bad value
#004: D:\a\hdf5\hdf5\hdf5-1.14.5\src\H5VLcallback.c line 1865 in H5VL_dataset_create(): dataset create failed
major: Virtual Object Layer
minor: Unable to create file
#005: D:\a\hdf5\hdf5\hdf5-1.14.5\src\H5VLcallback.c line 1830 in H5VL__dataset_create(): dataset create failed
major: Virtual Object Layer

Here’s the output from the CMake build

[cmake] – hdf5_POPULATED:
[cmake] – hdf5_BINARY_DIR:
[cmake] – hdf5_SOURCE_DIR:
[cmake] – HDF5 Found: 1
[cmake] – HDF5_VERSION: 1.14.5
[cmake] – HDF5_INCLUDE_DIRS:
[cmake] – HDF5_LIBRARIES:
[cmake] – HDF5_DEFINITIONS:
[cmake] – HDF5 include: C:/Program Files/HDF_Group/HDF5/1.14.5/include;
[cmake] – HDF5 library: hdf5::hdf5-shared

BUILD FROM SOURCE - BUILDS BUT DOES NOT RUN

When I set HDF5_BUILD_FROM_SOURCE to ON, then it builds without errors, however the program terminates when I run it with a DLL not found error.

Error:

The program ‘[30692] farfield.exe’ has exited with code -1073741515 (0xc0000135).

CMakeBuild

[cmake] – hdf5_POPULATED: True
[cmake] – hdf5_BINARY_DIR: C:/Repos/farfield/build/_deps/hdf5-build
[cmake] – hdf5_SOURCE_DIR: C:/Repos/farfield/build/_deps/hdf5-src
[cmake] – HDF5 Found:
[cmake] – HDF5_VERSION:
[cmake] – HDF5_INCLUDE_DIRS: C:/Repos/farfield/build/_deps/hdf5-build/src
[cmake] – HDF5_LIBRARIES:
[cmake] – HDF5_DEFINITIONS:
[cmake] – HDF5 include:
[cmake] – HDF5 library: hdf5-shared

I’d appreciate any guidance, thank you!

Hey all, I included all the code above for reference, but really all I’m asking for is how to use CMakeLists.txt to build HDF5 for multithreading, I imagine this has been done before, just can’t find it online. Would appreciate if someone can point me in the right direction, thank you!

Hi, we have seen your post, but are a little confused. Do you mind providing us with some more information or a reproducer? Thank you very much!

Thank you gsong!

Please see a test repository here that reproduces my error:

===

Allow me to rephrase my question: I was reviewing this page here:

https://support.hdfgroup.org/documentation/hdf5/latest/_l_b_compiling.html

And it describes how to load HDF5 from an already built copy, which in my case was installed using the HDF5 installer MSI (Download HDF5® - The HDF Group - ensuring long-term access and usability of HDF data and supporting users of HDF technologies).

This installation works without difficulty, and is implemented with this code in my CMakeLists.txt which implements if HDF5_BUILD_FROM_SOURCE is set to OFF:

set (LIB_TYPE SHARED) # STATIC or SHARED
string(TOLOWER ${LIB_TYPE} SEARCH_TYPE)
find_package(HDF5 NAMES hdf5 COMPONENTS C ${SEARCH_TYPE})
set (LINK_LIBS ${LINK_LIBS} ${HDF5_C_${LIB_TYPE}_LIBRARY})

In my application, HDF5 loaded this way works perfectly in single threaded applications. However, if I attempt to call HDF5 from two threads simultaneously, even if they are completely isolated and do not reference the same file, I get an error.

I believe that this is caused by the way that HDF5 is built. I’d like to build it with flags that set the build to be thread safe. To do this, I’d like to build from the source repository, which is supposed to be this part here, implementing if HDF5_BUILD_FROM_SOURCE is set to ON:

FetchContent_Declare(
hdf5
GIT_REPOSITORY GitHub - HDFGroup/hdf5: Official HDF5® Library Repository
GIT_TAG hdf5_1.14.5
)
FetchContent_MakeAvailable(hdf5)
set (LINK_LIBS ${LINK_LIBS} hdf5-shared)
set (HDF5_INCLUDE_DIRS ${hdf5_BINARY_DIR}/src)

That way I can build with the appropriate configuration flags, these ones for example:

set(BUILD_TESTING OFF CACHE BOOL “” FORCE)
set(BUILD_SHARED_LIBS ON CACHE BOOL “Build shared libraries” FORCE)
set(BUILD_STATIC_LIBS OFF CACHE BOOL “Build static libraries” FORCE)
set(HDF5_ENABLE_THREADSAFE ON CACHE BOOL “” FORCE)
set(HDF5_BUILD_HL_LIB OFF CACHE BOOL “” FORCE) # Disable high-level APIs for thread safety
set(HDF5_BUILD_EXAMPLES OFF CACHE BOOL “” FORCE) # Disable examples
set(HDF5_BUILD_TOOLS OFF CACHE BOOL “” FORCE) # Disable tools

I don’t have the ability to control the build using the Windows MSI installer, it seems to install in a manner that is not thread safe. So my goal is to be able to use CMakeLists.txt to build from the source directory, that way I can use hdf5 in a multithreaded environment.

Gsong - I edited my reply above to include a test repository that reproduces my error

Hi @drmittelstein,

if I understand what you’re trying to do correctly, you should have no issues once the library is properly built with thread-safety enabled (though note that this won’t be concurrent, as the library currently still has a global lock). Since you’re encountering a DLL error at runtime, this may just be an issue of needing to set the PATH environment variable correctly, depending on where the built HDF5 gets installed to (and assuming that HDF5 is the missing DLL, which makes sense for the HDF5_BUILD_FROM_SOURCE=OFF/ON situation). Otherwise, you may need to use some utility to see which DLL is missing.

Thank you @jhenderson,

you should have no issues once the library is properly built with thread-safety enabled

That is exactly correct, I just need instruction on how to build hdf5 from source using CMakeLists.txt. Using the Windows installer and find_package works well to incorporate the version of hdf5 that is not thread safe. However, I don’t know how to correctly install and include the appropriate library linkages when building from source, which I believe is what I need to do in order to build the thread safe version of hdf5. Assistance is appreciated!

though note that this won’t be concurrent, as the library currently still has a global lock

In the context of the example that I included, can you explain what you mean by this? Why would there need to be a global lock if there are multiple separate files?

For example, in the “mutex” branch of my test repository, I can “solve” this problem by using mutex, but that seems to be an unnecessary compromise in performance.

I just need instruction on how to build hdf5 from source using CMakeLists.txt

Building HDF5 from source this way (using FetchContent) is perhaps a slightly more advanced way, where one might usually just build HDF5 from source using the directions at hdf5/release_docs/INSTALL_CMake.txt at develop · HDFGroup/hdf5 · GitHub and then use find_package to find the installed HDF5 in their CMakeLists.txt. The FetchContent approach is likely either not installing HDF5 somewhere or is installing it to a non-system-wide directory by default, meaning you’d need to set the PATH environment variable on Windows so that the installation directory is available when DLLs are searched for.

Why would there need to be a global lock if there are multiple separate files?

HDF5 has several variables, data structures, etc. that it needs to protect when multiple threads are involved. Notably, this includes things like HDF5’s ID management code, which would be affected even when you have separate threads writing to different files. When the library is configured to be thread-safe, this currently just enables a global lock that only allows one thread inside an HDF5 API routine at a time. There is ongoing work toward enabling concurrent multi-threading in HDF5, but this is something that isn’t currently possible in HDF5.

Thank you @jhenderson, that is unfortunate. Do you know of other dataframe projects similar to HDF5 that do have multi-threading capability?

The FetchContent approach is likely either not installing HDF5 somewhere or is installing it to a non-system-wide directory by default, meaning you’d need to set the PATH environment variable on Windows so that the installation directory is available when DLLs are searched for.

Yes, this is what is happening, the CMakeList.txt is building to specific directories within the dependency, which I believe is expected default behavior for CMakeList

[cmake] – hdf5_POPULATED: True
[cmake] – hdf5_BINARY_DIR: C:/Repos/farfield/build/_deps/hdf5-build
[cmake] – hdf5_SOURCE_DIR: C:/Repos/farfield/build/_deps/hdf5-src
[cmake] – HDF5 Found:
[cmake] – HDF5_VERSION:
[cmake] – HDF5_INCLUDE_DIRS: C:/Repos/farfield/build/_deps/hdf5-build/src
[cmake] – HDF5_LIBRARIES:
[cmake] – HDF5_DEFINITIONS:
[cmake] – HDF5 include:
[cmake] – HDF5 library: hdf5-shared

I tried manually searching the directory for the DLL, but couldn’t find it. I think I’m just doing CMakeLists wrong, do you or does anyone else have any insight into any syntax errors that I’m making or how to fix this? I do need to use CMakeLists because I need the application to be able to be deployed to a system without having the user to install pre-requisite software.

In my hdf5-examples fork I have a branch that does build hdf5 inside the examples, see:

The code support is in the config/cmake/HDF5ExampleMacros.cmake file; there are two macros involved, HDF5_SUPPORT and EXTERNAL_HDF5_LIBRARY.

Thank you. This seems to be what I want, this builds hdf5 from the github correct?

How would I incorporate this into my CMakeLists.txt workflow?