Dear HDF5 experts:
So I have a server that has been running fine for about a year with no
problem. The server receives TCP/SSL data from clients, verifies their
validity, and then opens them in memory, verifies the contents and
finally saves them.
The error I got is the following:
HDF5-DIAG: Error detected in HDF5 (1.8.16) thread 0:
#000: /path/to/hdf5/CMake-hdf5-1.8.16/hdf5-1.8.16/src/H5A.c line 731
in H5Aget_type(): unable t
o register datatype
major: Object atom
minor: Unable to register new atom
#001: /path/to/hdf5/CMake-hdf5-1.8.16/hdf5-1.8.16/src/H5I.c line 888
in H5I_register(): can't r
emove ID from available ID list
major: Object atom
minor: Unable to remove object
HDF5-DIAG: Error detected in HDF5 (1.8.16) thread 0:
#000: /path/to/hdf5/CMake-hdf5-1.8.16/hdf5-1.8.16/src/H5T.c line 1839
in H5Tget_class(): not a
datatype
major: Invalid arguments to routine
minor: Inappropriate type
HDF5-DIAG: Error detected in HDF5 (1.8.16) thread 0:
#000: /path/to/hdf5/CMake-hdf5-1.8.16/hdf5-1.8.16/src/H5A.c line 731
in H5Aget_type(): unable t
o register datatype
major: Object atom
minor: Unable to register new atom
#001: /path/to/hdf5/CMake-hdf5-1.8.16/hdf5-1.8.16/src/H5I.c line 888
in H5I_register(): can't r
emove ID from available ID list
major: Object atom
minor: Unable to remove object
HDF5-DIAG: Error detected in HDF5 (1.8.16) thread 0:
#000: /path/to/hdf5/CMake-hdf5-1.8.16/hdf5-1.8.16/src/H5T.c line 1839
in H5Tget_class(): not a
datatype
major: Invalid arguments to routine
minor: Inappropriate type
Here are some facts:
1. I use Debian Jessie: "Linux 3.16.0-4-amd64 #1 SMP Debian
3.16.7-ckt25-2+deb8u3 (2016-07-02) x86_64 GNU/Linux"
2. This error is not reproducible. The files did not get saved because
of this, but after restarting the server, the same files were
accepted with no errors.
3. Nothing was changed in the program for months (about 3-4 months,
since I updated to 1.8.16).
4. Nothing about the data being transferred was changed other than data
values. Data format is always the same, and attributes always have
the same types.
5. The first error happened once, and then all HDF5 calls failed one
after the other... no success at all! I restarted the server, and
everything went fine.
6. The first function that reads the attributes and failed is the
following (please don't be intimidated by the long code... it's all
the repetitive, just trying different possible types):
template<typenameT>
ErrorCodeHDF5Attribute<T>::readAttribute(conststd::string&attribName,hid_tobj_id)
{
attribExists=H5Aexists(obj_id,attribName.c_str());
if(attribExists)
{
attribHandler=H5Aopen(obj_id,attribName.c_str(),H5P_DEFAULT);
attribType=H5Aget_type(attribHandler);
//intattribSize=H5Tget_size(attribType);
void*val;
//allpossibletypesareincludedfromhttps://www.hdfgroup.org/HDF5/doc/H5.user/Datatypes.html
if(H5Tequal(attribType,H5T_NATIVE_INT))
{
typedefintTypeToUse;
val=newTypeToUse;
readError=H5Aread(attribHandler,attribType,val);
value=static_cast<T>(*((TypeToUse*)val));
delete(TypeToUse*)val;
}
elseif(H5Tequal(attribType,H5T_NATIVE_UINT))
{
typedefunsignedintTypeToUse;
val=newTypeToUse;
readError=H5Aread(attribHandler,attribType,val);
value=static_cast<T>(*((TypeToUse*)val));
delete(TypeToUse*)val;
}
elseif(H5Tequal(attribType,H5T_NATIVE_SHORT))
{
typedefshortintTypeToUse;
val=newTypeToUse;
readError=H5Aread(attribHandler,attribType,val);
value=static_cast<T>(*((TypeToUse*)val));
delete(TypeToUse*)val;
}
elseif(H5Tequal(attribType,H5T_NATIVE_USHORT))
{
typedefunsignedshortintTypeToUse;
val=newTypeToUse;
readError=H5Aread(attribHandler,attribType,val);
value=static_cast<T>(*((TypeToUse*)val));
delete(TypeToUse*)val;
}
elseif(H5Tequal(attribType,H5T_NATIVE_LONG))
{
typedeflongintTypeToUse;
val=newTypeToUse;
readError=H5Aread(attribHandler,attribType,val);
value=static_cast<T>(*((TypeToUse*)val));
delete(TypeToUse*)val;
}
elseif(H5Tequal(attribType,H5T_NATIVE_ULONG))
{
typedefunsignedlongintTypeToUse;
val=newTypeToUse;
readError=H5Aread(attribHandler,attribType,val);
value=static_cast<T>(*((TypeToUse*)val));
delete(TypeToUse*)val;
}
elseif(H5Tequal(attribType,H5T_NATIVE_LLONG))
{
typedeflonglongintTypeToUse;
val=newTypeToUse;
readError=H5Aread(attribHandler,attribType,val);
value=static_cast<T>(*((TypeToUse*)val));
delete(TypeToUse*)val;
}
elseif(H5Tequal(attribType,H5T_NATIVE_ULLONG))
{
typedefunsignedlonglongintTypeToUse;
val=newTypeToUse;
readError=H5Aread(attribHandler,attribType,val);
value=static_cast<T>(*((TypeToUse*)val));
delete(TypeToUse*)val;
}
elseif(H5Tequal(attribType,H5T_NATIVE_FLOAT))
{
typedeffloatTypeToUse;
val=newTypeToUse;
readError=H5Aread(attribHandler,attribType,val);
value=static_cast<T>(*((TypeToUse*)val));
delete(TypeToUse*)val;
}
elseif(H5Tequal(attribType,H5T_NATIVE_DOUBLE))
{
typedefdoubleTypeToUse;
val=newTypeToUse;
readError=H5Aread(attribHandler,attribType,val);
value=static_cast<T>(*((TypeToUse*)val));
delete(TypeToUse*)val;
}
elseif(H5Tequal(attribType,H5T_NATIVE_LDOUBLE))
{
typedeflongdoubleTypeToUse;
val=newTypeToUse;
readError=H5Aread(attribHandler,attribType,val);
value=static_cast<T>(*((TypeToUse*)val));
delete(TypeToUse*)val;
}
elseif(H5Tequal(attribType,H5T_NATIVE_HBOOL))
{
typedeflongdoubleTypeToUse;
val=newTypeToUse;
readError=H5Aread(attribHandler,attribType,val);
value=static_cast<T>(*((TypeToUse*)val));
delete(TypeToUse*)val;
}
else
{
returnErrorCode(-114,"Unknown,unreadabledatatypeforattribute:"+attribName);
}
H5Aclose(attribHandler);
if(readError<0)
{
returnErrorCode(-115,"Anunknownerrorhappenedwhilereadingattribute:"+attribName);
}
else
{
returnErrorCode(0,"");
}
}
else
{
returnErrorCode(-112,"Error,attribute"+attribName+"doesnotexist.");
}
returnErrorCode(-113,"Error,controlreachedtheendofthefunctionreadAttribute()whenreadingattribute"+attribName+".Thisbehaviorisunexplainedaccordingtothealgorithmofreadingattributes.");
}
Color code (from Qt Creator):
Red: member variable
Purple: Class
Green: Comment or const char* string
Yellow: Built-in
Blue: def'ed Constant
Black: local variable or global function
Since I had a failure, I recompiled HDF5 1.8.17. The server is running
again and everything looks peaceful.
Would you have any recommendation to avoid this in the future? Does my
code have any obvious problem? I'm glad to share my whole class if that
helps.
Cheers,
Sam