How to create scalar variable length string


#1

Using HDF.PInvoke.1.10.612 I can successfully create a scalar fixed length string like this;

void CreateFixedLenString(string s)
{
	long hf = H5F.create(@"d:\testfile.h5", H5F.ACC_TRUNC);
	long hs = H5S.create(H5S.class_t.SCALAR);
	long ht = H5T.copy(H5T.C_S1);
	int err = H5T.set_size(ht, new IntPtr(s.Length + 1));

	long ha = H5A.create(hf, "fixed_len_str", ht, hs);

	try
	{
		unsafe
		{
			var bytes = Encoding.ASCII.GetBytes(s + '\0');

			fixed (byte* b = bytes)
			{
				int result = H5A.write(ha, ht, new IntPtr(b));
				result.Dump(); // LinqPad helper
			}
		}
	}
	catch (Exception ex)
	{
		ex.Dump();// LinqPad helper
	}
	finally
	{
		H5A.close(ha);
		H5T.close(ht);
		H5S.close(hs);
		H5F.close(hf);
	}
}

But if I try pretty much the same thing to create a variable length string I end up with an access violation:

void CreateVariableLenString(string s)
{
	long hf = H5F.create(@"d:\testfile.h5", H5F.ACC_TRUNC);
	long hs = H5S.create(H5S.class_t.SCALAR);
    // The only difference
	long ht = **H5T.create(H5T.class_t.STRING, H5T.VARIABLE);**
	long ha = H5A.create(hf, "variable_len_str", ht, hs);

	try
	{
		unsafe
		{
			var bytes = Encoding.ASCII.GetBytes(s + '\0');

			fixed (byte* b = bytes)
			{
				int result = H5A.write(ha, ht, new IntPtr(b));
				result.Dump(); // LinqPad helper
			}
		}
	}
	catch (Exception ex)
	{
		ex.Dump(); // LinqPad helper
	}
	finally
	{
		H5A.close(ha);
		H5T.close(ht);
		H5S.close(hs);
		H5F.close(hf);
	}
}

Any ideas what I’m doing wrong?


Write variable length UTF8 string attribute with Fortran API
#2

I have a solution, which is like so:

void CreateFixedLenString(long hf, string s)
{
	long hs = H5S.create(H5S.class_t.SCALAR);
	long ht = H5T.copy(H5T.C_S1);
	int err = H5T.set_size(ht, new IntPtr(s.Length));

	long ha = H5A.create(hf, "fixed_len_str", ht, hs);

	try
	{
		unsafe
		{
			fixed (void* fixedString = Encoding.ASCII.GetBytes(s))
			{
				int result = H5A.write(ha, ht, new IntPtr(fixedString));
				result.Dump();
			}			
		}
	}
	catch (Exception ex)
	{
		ex.Dump();
	}
	finally
	{
		H5A.close(ha);
		H5T.close(ht);
		H5S.close(hs);
	}
}

void CreateVariableLenString(long hf, string s)
{
	long hs = H5S.create(H5S.class_t.SCALAR);
	long ht = H5T.create(H5T.class_t.STRING, H5T.VARIABLE);
	long ha = H5A.create(hf, "variable_len_str", ht, hs);

	try
	{
		unsafe
		{
			fixed (void* fixedString = Encoding.ASCII.GetBytes(s))
			{
				var stringArray = new IntPtr[1] { new IntPtr(fixedString) };

				fixed (void* fixedStringArray = stringArray)
				{
					int result = H5A.write(ha, ht, new IntPtr(fixedStringArray));
					result.Dump();
				}
			}
		}
	}
	catch (Exception ex)
	{
		ex.Dump();
	}
	finally
	{
		H5A.close(ha);
		H5T.close(ht);
		H5S.close(hs);
	}
}

Which works but seems odd. Why do I need an extra level of indirection for a variable length string vs a fixed length string?


#3

The experts should chime in, but I believe this is a quirky corner case in the C-API.

To H5[A,D]write a dataset of fixed-size strings you supply a contiguous buffer of bytes. If the underlying dataspace is scalar, this “collapses” nicely into a byte buffer.

To H5[A,D]write a dataset of variable-length strings, you supply a buffer of pointers (a ragged array of null-terminated byte sequences). If the underlying dataspace is scalar, this currently “collapses” into a single element pointer array. I don’t know what kind of complications fixing the aesthetics would create.

G.


#4

Ok thanks. Is that documented anywhere?


#5

See this GitHub issue. G.