H5dump and compound types


#1

All,

This is probably a simple answer, but I’m apparently so bad at HDF5 (thanks to a life of simple netCDF-4 files), that I can’t figure it out.

Namely, I have a file that has this inside it:

$ h5dump -H -p -d SMAP_data/theRest RIM_SMAP_V1_20151203_10_JPL_V4_CMORPH_V1.h5 | less
HDF5 "RIM_SMAP_V1_20151203_10_JPL_V4_CMORPH_V1.h5" {
DATASET "SMAP_data/theRest" {
   DATATYPE  H5T_COMPOUND {
      H5T_ARRAY { [76][1624] H5T_IEEE_F64LE } "anc_dir";
      H5T_ARRAY { [76][1624] H5T_IEEE_F64LE } "anc_swh";
      H5T_ARRAY { [76][1624] H5T_IEEE_F64LE } "azi_aft";
      H5T_ARRAY { [76][1624] H5T_IEEE_F64LE } "azi_fore";
      H5T_ARRAY { [76][1624] H5T_IEEE_F64LE } "inc_aft";
      H5T_ARRAY { [76][1624] H5T_IEEE_F64LE } "inc_fore";
      H5T_ARRAY { [76][1624] H5T_IEEE_F64LE } "num_ambiguities";
      H5T_ARRAY { [76][1624] H5T_IEEE_F64LE } "Q_Flag";
      H5T_ARRAY { [6][1624] H5T_IEEE_F64LE } "row_time";
      H5T_ARRAY { [76][1624][4] H5T_IEEE_F64LE } "smap_ambiguity_dir";
      H5T_ARRAY { [76][1624][4] H5T_IEEE_F64LE } "smap_ambiguity_spd";
      H5T_ARRAY { [76][1624] H5T_IEEE_F64LE } "smap_high_dir";
      H5T_ARRAY { [76][1624] H5T_IEEE_F64LE } "smap_high_dir_smooth";
      H5T_ARRAY { [76][1624] H5T_IEEE_F64LE } "smap_high_spd";
      H5T_ARRAY { [76][1624] H5T_IEEE_F64LE } "smap_sss_uncertainty";
   }
   DATASPACE  SIMPLE { ( 1 ) / ( 1 ) }
   STORAGE_LAYOUT {
      CONTIGUOUS
      SIZE 19825792
      OFFSET 58326232
   }
   FILTERS {
      NONE
   }
   FILLVALUE {
      FILL_TIME H5D_FILL_TIME_IFSET
      VALUE  H5D_FILL_VALUE_DEFAULT
   }
   ALLOCATION_TIME {
      H5D_ALLOC_TIME_LATE
   }
}
}

What I’d like to do is dump out the values of, say, Q_Flag but I can’t seem to get the h5dump command right to do it. I thought maybe:

$ h5dump  -d SMAP_data/theRest/Q_Flag RIM_SMAP_V1_20151203_10_JPL_V4_CMORPH_V1.h5
HDF5 "RIM_SMAP_V1_20151203_10_JPL_V4_CMORPH_V1.h5" {h5dump error: unable to get link info from "SMAP_data/theRest/Q_Flag"

}

But no. Any help on what I am missing here. It must be possible…right?

Thanks for any help,
Matt


#2

Matt, I’m afraid there is no such option for h5dump at the moment. That said there are fine choices in Python, Julia, R, C++, etc., whose HDF5 modules can do it with a modest effort on your part. I’m sure the designers of this compound datatype had good reasons and the best intentions, but the needs of data consumers weren’t very high on the priority list. G.


#3

Hi Matt,

correction: I just realised you want to go the other way – will upload the correct example tomorrow – cheers.


WARNING: not the right example: This example goes from CSV to HDF5


here is an H5CPP example to convert HDF5 pod structs to CSV – with good performance. Adjust it to your needs , or if need help, shoot me a line.


/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#include "csv.h"
// data structure include file: `struct.h` must precede 'generated.h' as the latter contains dependencies
// from previous
#include "struct.h"

#include <h5cpp/core>      // has handle + type descriptors
// sandwiched: as `h5cpp/io` depends on `henerated.h` which needs `h5cpp/core`
	#include "generated.h" // uses type descriptors
#include <h5cpp/io>        // uses generated.h + core 

int main(){

	// create HDF5 container
	h5::fd_t fd = h5::create("output.h5",H5F_ACC_TRUNC);
	// create dataset   
	// chunk size is unrealistically small, usually you would set this such that ~= 1MB or an ethernet jumbo frame size
	h5::ds_t ds = h5::create<input_t>(fd,  "simple approach/dataset.csv",
				 h5::max_dims{H5S_UNLIMITED}, h5::chunk{10} | h5::gzip{9} );
	// `h5::ds_t` handle is seamlessly cast to `h5::pt_t` packet table handle, this could have been done in single step
	// but we need `h5::ds_t` handle to add attributes
	h5::pt_t pt = ds;
	// attributes may be added to `h5::ds_t` handle
	ds["data set"] = "monroe-county-crash-data2003-to-2015.csv";
	ds["cvs parser"] = "https://github.com/ben-strasser/fast-cpp-csv-parser"; // thank you!

	constexpr unsigned N_COLS = 5;
	io::CSVReader<N_COLS> in("input.csv"); // number of cols may be less, than total columns in a row, we're to read only 5
	in.read_header(io::ignore_extra_column, "Master Record Number", "Hour", "Reported_Location","Latitude","Longitude");
	input_t row;                           // buffer to read line by line
	char* ptr;      // indirection, as `read_row` doesn't take array directly
	while(in.read_row(row.MasterRecordNumber, row.Hour, ptr, row.Latitude, row.Longitude)){
		strncpy(row.ReportedLocation, ptr, STR_ARRAY_SIZE); // defined in struct.h
		h5::append(pt, row);
		std::cout << std::string(ptr) << "\n";
	}
	// RAII closes all allocated resources
}

This part must match to your needs:


#ifndef  CSV2H5_H 
#define  CSV2H5_H

constexpr int STR_ARRAY_SIZE = 20;
/*define C++ representation as POD struct*/
struct input_t {
	long MasterRecordNumber;
	unsigned int Hour;
	double Latitude;
	double Longitude;
	char ReportedLocation[STR_ARRAY_SIZE];
};
#endif

If you use my LLVM tool, you can generate this file, otherwise you can hand edit:

#ifndef H5CPP_GUARD_GCBLl
#define H5CPP_GUARD_GCBLl

namespace h5{
    //template specialization of input_t to create HDF5 COMPOUND type
    template<> hid_t inline register_struct<input_t>(){
        hsize_t at_00_[] ={20};            hid_t at_00 = H5Tarray_create(H5T_NATIVE_CHAR,1,at_00_);

        hid_t ct_00 = H5Tcreate(H5T_COMPOUND, sizeof (input_t));
        H5Tinsert(ct_00, "MasterRecordNumber",	HOFFSET(input_t,MasterRecordNumber),H5T_NATIVE_LONG);
        H5Tinsert(ct_00, "Hour",	HOFFSET(input_t,Hour),H5T_NATIVE_UINT);
        H5Tinsert(ct_00, "Latitude",	HOFFSET(input_t,Latitude),H5T_NATIVE_DOUBLE);
        H5Tinsert(ct_00, "Longitude",	HOFFSET(input_t,Longitude),H5T_NATIVE_DOUBLE);
        H5Tinsert(ct_00, "ReportedLocation",	HOFFSET(input_t,ReportedLocation),at_00);

        //closing all hid_t allocations to prevent resource leakage
        H5Tclose(at_00); 

        //if not used with h5cpp framework, but as a standalone code generator then
        //the returned 'hid_t ct_00' must be closed: H5Tclose(ct_00);
        return ct_00;
    };
}
H5CPP_REGISTER_STRUCT(input_t);

#endif