select hyperslab of VL data


#1

Hello,

Question about hyperslab with VL data.
This next program using H5Sselect_elements works as I expected:

#include "hdf5/hdf5.h"

struct s_data {
	uint64_t b;
	uint16_t a;
};

struct ext_data3 {
	uint64_t a;
	uint32_t b;
	int16_t nelem;
	struct s_data data[3];
};

struct ext_data {
	uint64_t a;
	uint32_t b;
	int16_t nelem;
	struct s_data data[];
};


int main()
{
	hid_t memspace_id, memtype_id;
    	hsize_t dims_mem = 1;
	hsize_t dims = 3;
	hsize_t coord;
	struct ext_data3 d3;
	hid_t stm, dataset, filespace_id, filetype_id;
	hvl_t wdata;

	dims_mem = 1;
	memspace_id = H5Screate_simple(1, &dims_mem, NULL);
	memtype_id = H5Tvlen_create(H5T_C_S1);

	filespace_id = H5Screate_simple(1, &dims, NULL);
	filetype_id = H5Tvlen_create(H5T_C_S1);
	stm = H5Fcreate("varsize.h5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
	dataset = H5Dcreate(stm, "event_prod", filetype_id, filespace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
	printf("\t\t*** WRITE DATA ***\n");
	for (hsize_t i = 0; i < dims; i++) {
		struct ext_data *d;
		size_t sz;
		hsize_t count;

		d = (struct ext_data *)&d3;
		d3.a = i;
		d3.b = i + 1;
		d3.nelem = 1;
		d3.data[0].a = 2 * i;
		d3.data[0].b = 2 * i + 1;
		if (0 == i % 2) {
			d3.data[1].a = 2 * i + 2;
			d3.data[1].b = 2 * i + 3;
			d3.nelem = 2;
		}
		printf("\tpoint # %llu\n", i);
		printf("ext_data a = %ld, ext_data b = %d, ext data nelem = %d\n", d3.a, d3.b, d3.nelem);
		for (int16_t k = 0; k < d3.nelem; k++)
			printf("str_data a = %d, str_data b = %ld\n", d3.data[k].a, d3.data[k].b);
		sz = sizeof(*d) + d3.nelem * sizeof(struct s_data);
		wdata.p = &d3;
		wdata.len = sz;
		printf("sizeof d = %lu, sz = %lu\n", sizeof(*d), sz);
		count = 1;
		coord = i;
		H5Sselect_elements(filespace_id, H5S_SELECT_SET, count, &coord);
		H5Dwrite(dataset, memtype_id, memspace_id, filespace_id, H5P_DEFAULT, &wdata);
	}
	H5Dclose(dataset);
	H5Fclose(stm);
	H5Tclose(filetype_id);
	H5Sclose(filespace_id);
	H5Tclose(memtype_id);
	H5Sclose(memspace_id);

/* Read */
	hssize_t npoints;
	hvl_t rdata;
	hsize_t req_size;

	stm = H5Fopen("varsize.h5", H5F_ACC_RDONLY, H5P_DEFAULT);
	dataset = H5Dopen(stm, "/event_prod", H5P_DEFAULT);
	filespace_id = H5Dget_space(dataset);
	npoints = H5Sget_simple_extent_npoints(filespace_id);
	filetype_id = H5Tget_native_type(H5Dget_type(dataset), H5T_DIR_DEFAULT);

	memspace_id = H5Scopy(filespace_id);
	memtype_id = H5Tcopy(filetype_id);
	coord = 0;
	H5Sselect_elements(memspace_id, H5S_SELECT_SET, 1, &coord);
	printf("\t\t*** READ DATA ***\n");
	H5Dvlen_get_buf_size(dataset, filetype_id, filespace_id, &req_size);
	printf("requared size for all dataset = %llu\n", req_size);
	for (hssize_t i = 0; i < npoints; i++) {
		struct ext_data *d;

		printf("\tpoint # %llu\n", i);
		coord = (hsize_t)i;
		H5Sselect_elements(filespace_id, H5S_SELECT_SET, 1, &coord);
		H5Dvlen_get_buf_size(dataset, filetype_id, filespace_id, &req_size);
		printf("requared size for one element = %llu\n", req_size);
		H5Dread(dataset, memtype_id, memspace_id, filespace_id, H5P_DEFAULT, &rdata);
		d = rdata.p;
		printf("ext_data a = %ld, ext_data b = %d, ext data nelem = %d\n", d->a, d->b, d->nelem);
		for (int16_t j = 0; j < d->nelem; j++)
			printf("str_data a = %d, str_data b = %ld\n", (d->data[j]).a, (d->data[j]).b);
		H5Dvlen_reclaim(memtype_id, memspace_id, H5P_DEFAULT, &rdata);
	}
	H5Tclose(memtype_id);
	H5Sclose(memspace_id);
	H5Tclose(filetype_id);
	H5Sclose(filespace_id);
	H5Dclose(dataset);
	H5Fclose(stm);
	return (0);
}

But when I tried to use hyperslab:

#include "hdf5/hdf5.h"

struct s_data {
	uint64_t b;
	uint16_t a;
};

struct ext_data3 {
	uint64_t a;
	uint32_t b;
	int16_t nelem;
	struct s_data data[3];
};

struct ext_data {
	uint64_t a;
	uint32_t b;
	int16_t nelem;
	struct s_data data[];
};


int main()
{
	hid_t memspace_id, memtype_id;
	hsize_t dims = 3;
	hsize_t coord;
	hsize_t hyperslab_size = 1;
	struct ext_data3 d3[hyperslab_size];
	hid_t stm, dataset, filespace_id, filetype_id;
	hvl_t wdata[hyperslab_size];
	hsize_t pushposition;

	memspace_id = H5Screate_simple(1, &hyperslab_size, NULL);
	memtype_id = H5Tvlen_create(H5T_C_S1);

	filespace_id = H5Screate_simple(1, &dims, NULL);
	filetype_id = H5Tvlen_create(H5T_C_S1);
	stm = H5Fcreate("varsize.h5", H5F_ACC_TRUNC, H5P_DEFAULT,     H5P_DEFAULT);
	dataset = H5Dcreate(stm, "event_prod", filetype_id, filespace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
	printf("\t\t*** WRITE DATA ***\n");
	hsize_t chunk_quotient = dims / hyperslab_size;
	hsize_t chunk_remainder = dims % hyperslab_size;
	printf("quotient = %llu\n", chunk_quotient);
	printf("remainder = %llu\n", chunk_remainder);
	pushposition = 0;
	for (hsize_t i = 0; i < chunk_quotient; i++) {
		struct ext_data *d;
		size_t sz;
		hsize_t offset, count;

		printf("chunk # %llu\n", i);
		d = (struct ext_data *)d3;
		for (hsize_t j = 0; j < hyperslab_size; j++) {
			d3[j].a = i * hyperslab_size + j;
			d3[j].b = i * hyperslab_size + j + 1;
			d3[j].nelem = 1;
			d3[j].data[0].a = 2 * (i * hyperslab_size + j);
			d3[j].data[0].b = 2 * (i * hyperslab_size + j) + 1;
			if (0 == (i * hyperslab_size + j) % 2) {
				d3[j].data[1].a = 2 * (i * hyperslab_size + j) + 2;
				d3[j].data[1].b = 2 * (i * hyperslab_size + j) + 3;
				d3[j].nelem = 2;
			}
			printf("\tpoint # %llu\n", i * hyperslab_size + j);
			printf("ext_data a = %ld, ext_data b = %d, ext data nelem = %d\n", d3[j].a, d3[j].b, d3[j].nelem);
			for (int16_t k = 0; k < d3[j].nelem; k++)
				printf("str_data a = %d, str_data b = %ld\n", d3[j].data[k].a, d3[j].data[k].b);
			sz = sizeof(*d) + d3[j].nelem * sizeof(struct s_data);
			wdata[j].p = d3 + j;
			wdata[j].len = sz;
			printf("sizeof d = %lu, sz = %lu\n", sizeof(*d), sz);
		}
		count = 0;
		for (hsize_t j = 0; j < hyperslab_size; j++)
			count += wdata[j].len;
		offset = 0;
		H5Sselect_hyperslab(memspace_id, H5S_SELECT_SET, &offset, NULL, &count, NULL);
		offset = pushposition;
		printf("offset = %llu, count = %llu\n", offset, count);
		H5Sselect_hyperslab(filespace_id, H5S_SELECT_SET, &offset, NULL, &count, NULL);
    	H5Dwrite(dataset, memtype_id, memspace_id, filespace_id, H5P_DEFAULT, wdata);
		pushposition += count;
	}
	H5Dclose(dataset);
	H5Fclose(stm);
	H5Tclose(filetype_id);
	H5Sclose(filespace_id);
	H5Tclose(memtype_id);
	H5Sclose(memspace_id);
	return (0);
}

I have an error:
*** WRITE DATA ***
quotient = 3
remainder = 0
chunk # 0
point # 0
ext_data a = 0, ext_data b = 1, ext data nelem = 2
str_data a = 0, str_data b = 1
str_data a = 2, str_data b = 3
sizeof d = 16, sz = 48
offset = 0, count = 48
HDF5-DIAG: Error detected in HDF5 (1.12.1) thread 0:
#000: …/…/…/…/src/hdf5-1.12.1/src/H5Dio.c line 291 in H5Dwrite(): can’t write data
major: Dataset
minor: Write failed
#001: …/…/…/…/src/hdf5-1.12.1/src/H5VLcallback.c line 2113 in H5VL_dataset_write(): dataset write failed
major: Virtual Object Layer
minor: Write failed
#002: …/…/…/…/src/hdf5-1.12.1/src/H5VLcallback.c line 2080 in H5VL__dataset_write(): dataset write failed
major: Virtual Object Layer
minor: Write failed
#003: …/…/…/…/src/hdf5-1.12.1/src/H5VLnative_dataset.c line 198 in H5VL__native_dataset_write(): could not get a validated dataspace from mem_space_id
major: Invalid arguments to routine
minor: Bad value
#004: …/…/…/…/src/hdf5-1.12.1/src/H5S.c line 266 in H5S_get_validated_dataspace(): selection + offset not within extent
major: Dataspace
minor: Out of range

How to select hyperslab correctly?

Thanks and best,
Nazar


#3

Hi @nazar19681980,

Would you mind to attach HDF5 file varsize.h5 (alternatively, a screenshot of dataset event_prod opened in HDFView) and indicate which data you would like to read from dataset event_prod using an hyperslab? With this info, it will be easier to help!


#4

Unfortunately, I can’t attach files, because I see message “new users can not upload attachments”. I’ve installed HDFView-2.11 right now, but I don’t see anything in any h5 file (I see “groupsize = 0, number of attributes = 0” even on the SDS.h5 file that was created by h5_write.c example). Maybe it’s because wrong version of hdfview, I don’t know. Sorry, I don’t have enough time to understand what’s wrong with my HDFview write now.

The first program works well. I can write the data from “wdata” to the dataset using H5Sselect_elemets, and I can read it after reopen the file. Here is the output from the first program:

                 *** WRITE DATA ***
        point # 0
ext_data a = 0, ext_data b = 1, ext data nelem = 2
str_data a = 0, str_data b = 1
str_data a = 2, str_data b = 3
sizeof d = 16, sz = 48
        point # 1
ext_data a = 1, ext_data b = 2, ext data nelem = 1
str_data a = 2, str_data b = 3
sizeof d = 16, sz = 32
        point # 2
ext_data a = 2, ext_data b = 3, ext data nelem = 2
str_data a = 4, str_data b = 5
str_data a = 6, str_data b = 7
sizeof d = 16, sz = 48
                *** READ DATA ***
requared size for all dataset = 128
        point # 0
requared size for one element = 48
ext_data a = 0, ext_data b = 1, ext data nelem = 2
str_data a = 0, str_data b = 1
str_data a = 2, str_data b = 3
        point # 1
requared size for one element = 32
ext_data a = 1, ext_data b = 2, ext data nelem = 1
str_data a = 2, str_data b = 3
        point # 2
requared size for one element = 48
ext_data a = 2, ext_data b = 3, ext data nelem = 2
str_data a = 4, str_data b = 5
str_data a = 6, str_data b = 7

But in the second program I have problems with writing the data to the dataset. The output error “selection + offset not within extent” tells me that something wrong with my H5Sselect_hyperslab, as I understand, but I don’t know what exactly.

(If you give me some extra days, I will fix my problems with HDFview, because now I’m really very busy).

Best,
Nazar


#5

Does h5ls or h5dump report something?

We encountered a similar problem recently using HDFView 2.11 on a new Linux system (Ubuntu)–HDF5 file appear to be empty. I think the issue was that HDFView 2.11 doesn’t play well with HDF5 1.10. Our SA solved it by grabbing HDFView 2.11 from a CentOS 7 system. I’m not sure how that re-pointed it to 1.8.

There is a much newer version of HDFView, 3.X, but it has a weird bug which causes images of slices of arrays to be displayed incorrectly, but it only occurs if you slice the array in a certain way. (The bug has been logged as HDFVIEW-197 by The HDF Group.)


#6

Here is the report of h5dump about the “varsize.h5” file created by first program:

HDF5 "./varsize.h5" {
GROUP "/" {
   DATASET "event_prod" {
      DATATYPE  H5T_VLEN { H5T_STRING {
         STRSIZE 1;
         STRPAD H5T_STR_NULLTERM;
         CSET H5T_CSET_ASCII;
         CTYPE H5T_C_S1;
      }}
      DATASPACE  SIMPLE { ( 3 ) / ( 3 ) }
      DATA {
      (0): ("", "", "", "", "", "", "", "", "\001", "", "", "", "\002", "", "", "", "\001", "", "", "", "", "", "", "", "", "", "D", "\037", "\005", "V", "", "", "\003", "", "", "", "", "", "", "", "\002", "", "", "", "", "", "", ""),
      (1): ("\001", "", "", "", "", "", "", "", "\002", "", "", "", "\001", "", "", "", "\003", "", "", "", "", "", "", "", "\002", "", "D", "\037", "\005", "V", "", ""),
      (2): ("\002", "", "", "", "", "", "", "", "\003", "", "", "", "\002", "", "", "", "\005", "", "", "", "", "", "", "", "\004", "", "D", "\037", "\005", "V", "", "", "\007", "", "", "", "", "", "", "", "\006", "", "", "", "", "", "", "")
      }
   }
}
}

#7

Nazar, I think you should try to separate two issues:

  1. Defining the correct selections (point or hyperslab) has nothing to do with a dataset’s element type. Just work out your selection logic with integer datasets, and put that to one side.
  2. I’m concerned about your datatypes. Your in-file and in-memory types are null-terminated, variable-length strings. The in-memory representation of such a string is char* or char** for a bunch of them. For strings, and strings only, the HDF5 library does the conversion to hvl_t behind the scenes; no need for you to get involved. That aside, what makes you think that your pointer initialization a la wdata.p = &d3; will result in a null-terminated byte sequence? I’m sure there will be a \0 somewhere in user space, but maybe not where you expect it.

Best, G.


#8

Nazar, I believe I understand what you are trying to achieve. You are trying to dump string representations of your structs (ext_data, ext_data3), right? If so, the approach you are taking 1) won’t work and 2) even if it worked, would be wasteful. I described the reason for 1) in my previous reply. The in-memory representation can not be “casted” into a string representation, and would be processor architecture dependent. 2) would be wasteful because you’d be adding conversion overhead (binary -> string, string -> binary) to all I/O operations.

Consider using a compound datatype (see section 6.5.2 of the Users Guide)!

G.


#9

Dear gheber,

Thanks for the answer. I’m not trying to dump string representations of ext_data struct. The ext_data3 structure contains the data that i’m trying to write to the dataset without any treatment of content (just byte-to-byte like working with unix file). I use ext_data structure and ext_data3.nelem (ext_data3.nelem defines number of used elements from the array of s_data structure) to measure the size of the data that should be written to the dataset (program 1, line 68: sz = sizeof(*d) + d3.nelem * sizeof(struct s_data);). Doing wdata.p = &d3 i just tell wdata where is the data that should be written to the dataset. I don’t cast anything. wdata doesn’t know what is inside wdata.p. There is just some amount of byte information. You can see from my first program that I can write the data from ext_data3 structure one by one using selection one element of file space (by H5Sselect_elements). After that I can read all the data knowing how to treat the information stored in the dataset (reading one by one using H5Sselect_elements).
But when i try to use H5Sselect_hyperslab, I have an error:

   #003: ../../../../src/hdf5-1.12.1/src/H5VLnative_dataset.c line 200 in H5VL__native_dataset_write(): could not get a validated dataspace from file_space_id
    major: Invalid arguments to routine
    minor: Bad value

  #004: ../../../../src/hdf5-1.12.1/src/H5S.c line 266 in H5S_get_validated_dataspace(): selection + offset not within extent
    major: Dataspace
    minor: Out of range

P.S.: Maybe you misunderstood me because I used H5T_C_S1. I’m not gonna do anything with C-strings. I want to work with my data like with some amount of byte information. I started to work with hdf5 recently, maybe i just used wrong datatype when I created variable length type? But changing H5T_C_S1 to H5T_NATIVE_CHAR did not affect, program 1 works in the same way and program 2 still doesn’t work.

I’ll do some tests. Maybe I can give some more information later.

Best regards,
Nazar


#10

Hello again!

I found the problem with hyperslab! I used offset and count in H5Sselect_hyperslab in a wrong way. Now, as i see in documentation, count is the count of BLOCK, but i used count as a size in bytes. So, to make my program 2 work I should do count = hyperslab_size.

Thanks everyone for help. Best,
Nazar