hdf5 crashing when opening and closing files inside a loop

Hello,
I am new in hdf5, and I recently encountered a problem. I am using
fortran 90 and a fortran 90 hdf5 wrapper (
https://github.com/galtay/sphray/tree/master/hdf5_wrapper) to read and
write from many hdf5 files. The process of reading, modifying data and
writing is done inside a do loop.
I found that the code runs fine until a certain point when It shows the
error message:

***Abort HDF5 : Unable to open HDF5 file in open_file()!

file name : /mnt/su3ctm/ggranda/cp_test2/rep_-1_-1_-2/ivol31/galaxies.hdf5

I have checked out and the file exist, so that is not the problem. I think
is something connected with hdf5.

The code is the following:

    subroutine replications_translation(n_a,nsub,lbox,directory)
    ! subroutine to do the translations along all the the replications
    ! it makes use of the
    ! n_a: number of replications per axis
    ! nsub: number of subvolumes
    ! lbox: box size
    ! x: x coordinate
    ! y: y coordinate
    ! z: z coordinate
    ! directory: folder that contains the replications
    ! redshift: character that specifies the redshift (e.g. iz200)
        integer, intent(in) :: n_a,nsub
        real, dimension(:),allocatable :: x,y,z,dc,decl,ra
        real, intent(in) :: lbox
        character(*), intent(in) :: directory
        !character(5), intent(in) ::redshift
        character(2) :: temp_i,temp_j,temp_k,temp_subv
        integer :: i,j,k,l,ifile,dims(1),rank,count_l
        count_l=0
        do i=-n_a,n_a
            write(temp_i,"(I2)") i
            do j=-n_a,n_a
                write(temp_j,"(I2)") j
                do k=-n_a,n_a
                    write(temp_k,"(I2)") k
                    do l=30,nsub-1

                        write(temp_subv,"(I2)") l
                        call
hdf5_open_file(ifile,directory//'rep_'//trim(adjustl(temp_i))//'_'//trim(adjustl(temp_j))//'_'//trim(adjustl(temp_k))//'/ivol'//trim(adjustl(temp_subv))//'/galaxies.hdf5',readonly=.false.)

                        call
hdf5_get_dimensions(ifile,'Output001/mhalo',rank,dims)

allocate(x(dims(1)),y(dims(1)),z(dims(1)),dc(dims(1)),ra(dims(1)),decl(dims(1)))
                        call hdf5_read_data(ifile,'Output001/xgal',x)
                        call hdf5_read_data(ifile,'Output001/ygal',y)
                        call hdf5_read_data(ifile,'Output001/zgal',z)
                        x =x+i*lbox
                        y =y+j*lbox
                        z =z+k*lbox
                        dc =sqrt(x**2.0+y**2.0+z**2.0)
                        decl=asin(z/dc)
                        ra =atan2(y,x)

                        call
hdf5_write_data(ifile,'Output001/xgal_t',x,overwrite=.true.)
                        call
hdf5_write_attribute(ifile,'Output001/xgal_t/Comment','X(lightcone)
coordinate of this galaxy [Mpc/h]')

                        call
hdf5_write_data(ifile,'Output001/ygal_t',y,overwrite=.true.)
                        call
hdf5_write_attribute(ifile,'Output001/ygal_t/Comment','Y(lightcone)
coordinate of this galaxy [Mpc/h]')

                        call
hdf5_write_data(ifile,'Output001/zgal_t',z,overwrite=.true.)
                        call
hdf5_write_attribute(ifile,'Output001/zgal_t/Comment','Z(lightcone)
coordinate of this galaxy [Mpc/h]')

                        call
hdf5_write_data(ifile,'Output001/dc',dc,overwrite=.true.)
                        call
hdf5_write_attribute(ifile,'Output001/dc/Comment','Comoving distance
[Mpc/h]')
                        !print *, "check hdf5"
                        call
hdf5_write_data(ifile,'Output001/ra',ra,overwrite=.true.)
                        call
hdf5_write_attribute(ifile,'Output001/ra/Comment',"Right ascention")

                        call
hdf5_write_data(ifile,'Output001/decl',decl,overwrite=.true.)
                        call
hdf5_write_attribute(ifile,'Output001/decl/Comment',"Declination")

                        call hdf5_close_file(ifile)
                        print *, "Done with
"//directory//'rep_'//trim(adjustl(temp_i))//'_'//trim(adjustl(temp_j))//'_'//trim(adjustl(temp_k))//'/ivol'//trim(adjustl(temp_subv))//'/galaxies.hdf5'
                        deallocate(x,y,z,dc,ra,decl)
                        count_l=count_l+1
                        print *, "number =",count_l
                    enddo
               enddo
            enddo
        enddo

Could you please help me with that? The number of files that I need to
open, read, write is tremendous. So, I dont know if that is a limitation
for hdf5 or if my code is written with not good practices and that is
causing the crash.

Thanks in advance,

···

--
Guido

Guido,

···

Am 26.02.2017 um 17:11 schrieb Guido granda muñoz <guidogranda@gmail.com<mailto:guidogranda@gmail.com>>:

Hello,
I am new in hdf5, and I recently encountered a problem. I am using fortran 90 and a fortran 90 hdf5 wrapper (https://github.com/galtay/sphray/tree/master/hdf5_wrapper) to read and write from many hdf5 files. The process of reading, modifying data and writing is done inside a do loop.
I found that the code runs fine until a certain point when It shows the error message:

***Abort HDF5 : Unable to open HDF5 file in open_file()!

file name : /mnt/su3ctm/ggranda/cp_test2/rep_-1_-1_-2/ivol31/galaxies.hdf5

I have checked out and the file exist, so that is not the problem. I think is something connected with hdf5.

The code is the following:

    subroutine replications_translation(n_a,nsub,lbox,directory)
    ! subroutine to do the translations along all the the replications
    ! it makes use of the
    ! n_a: number of replications per axis
    ! nsub: number of subvolumes
    ! lbox: box size
    ! x: x coordinate
    ! y: y coordinate
    ! z: z coordinate
    ! directory: folder that contains the replications
    ! redshift: character that specifies the redshift (e.g. iz200)
        integer, intent(in) :: n_a,nsub
        real, dimension(:),allocatable :: x,y,z,dc,decl,ra
        real, intent(in) :: lbox
        character(*), intent(in) :: directory
        !character(5), intent(in) ::redshift
        character(2) :: temp_i,temp_j,temp_k,temp_subv
        integer :: i,j,k,l,ifile,dims(1),rank,count_l
        count_l=0
        do i=-n_a,n_a
            write(temp_i,"(I2)") i
            do j=-n_a,n_a
                write(temp_j,"(I2)") j
                do k=-n_a,n_a
                    write(temp_k,"(I2)") k
                    do l=30,nsub-1

                        write(temp_subv,"(I2)") l
                        call hdf5_open_file(ifile,directory//'rep_'//trim(adjustl(temp_i))//'_'//trim(adjustl(temp_j))//'_'//trim(adjustl(temp_k))//'/ivol'//trim(adjustl(temp_subv))//'/galaxies.hdf5',readonly=.false.)

                        call hdf5_get_dimensions(ifile,'Output001/mhalo',rank,dims)
                        allocate(x(dims(1)),y(dims(1)),z(dims(1)),dc(dims(1)),ra(dims(1)),decl(dims(1)))
                        call hdf5_read_data(ifile,'Output001/xgal',x)
                        call hdf5_read_data(ifile,'Output001/ygal',y)
                        call hdf5_read_data(ifile,'Output001/zgal',z)
                        x =x+i*lbox
                        y =y+j*lbox
                        z =z+k*lbox
                        dc =sqrt(x**2.0+y**2.0+z**2.0)
                        decl=asin(z/dc)
                        ra =atan2(y,x)

                        call hdf5_write_data(ifile,'Output001/xgal_t',x,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/xgal_t/Comment','X(lightcone) coordinate of this galaxy [Mpc/h]')

                        call hdf5_write_data(ifile,'Output001/ygal_t',y,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/ygal_t/Comment','Y(lightcone) coordinate of this galaxy [Mpc/h]')

                        call hdf5_write_data(ifile,'Output001/zgal_t',z,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/zgal_t/Comment','Z(lightcone) coordinate of this galaxy [Mpc/h]')

                        call hdf5_write_data(ifile,'Output001/dc',dc,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/dc/Comment','Comoving distance [Mpc/h]')
                        !print *, "check hdf5"
                        call hdf5_write_data(ifile,'Output001/ra',ra,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/ra/Comment',"Right ascention")

                        call hdf5_write_data(ifile,'Output001/decl',decl,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/decl/Comment',"Declination")

                        call hdf5_close_file(ifile)
                        print *, "Done with "//directory//'rep_'//trim(adjustl(temp_i))//'_'//trim(adjustl(temp_j))//'_'//trim(adjustl(temp_k))//'/ivol'//trim(adjustl(temp_subv))//'/galaxies.hdf5'
                        deallocate(x,y,z,dc,ra,decl)
                        count_l=count_l+1
                        print *, "number =",count_l
                    enddo
               enddo
            enddo
        enddo

Could you please help me with that? The number of files that I need to open, read, write is tremendous. So, I dont know if that is a limitation for hdf5 or if my code is written with not good practices and that is causing the crash.

Thanks in advance,

Have you checked for resource leakage? HDF5 is special in that way that a close of a file does not necessarily release all handles
associated with the file. So, if you leave a handle to a dataset, attribute, data type, etc dangling for each file, after some time
space is exhausted and HDF5 will crash on you in a weird way. Been there, done that.

As you have not provided all your subroutines I cannot see if this is the case with your code.

Regards,

     Mark Koennecke

--
Guido
_______________________________________________
Hdf-forum is for HDF software users discussion.
Hdf-forum@lists.hdfgroup.org<mailto:Hdf-forum@lists.hdfgroup.org>
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
Twitter: https://twitter.com/hdf5

Out of curiosity, do you have the same issue if you comment out everything except the open and close routine?

Also, I’m not 100% sure why you are choosing this file layout/looping scheme. It seems that you are creating a directory structure for different files/datasets. Can you move this directory/file structure to inside the HDF5 file using groups? That way you can eliminate this file open/close in the middle of the nested loop. This is not just an HDF5 issue either; I don’t think you would want to do it this way for POSIX writes either if you want to get good I/O performance.

Scot

···

On Feb 26, 2017, at 10:11 AM, Guido granda muñoz <guidogranda@gmail.com<mailto:guidogranda@gmail.com>> wrote:

Hello,
I am new in hdf5, and I recently encountered a problem. I am using fortran 90 and a fortran 90 hdf5 wrapper (https://github.com/galtay/sphray/tree/master/hdf5_wrapper) to read and write from many hdf5 files. The process of reading, modifying data and writing is done inside a do loop.
I found that the code runs fine until a certain point when It shows the error message:

***Abort HDF5 : Unable to open HDF5 file in open_file()!

file name : /mnt/su3ctm/ggranda/cp_test2/rep_-1_-1_-2/ivol31/galaxies.hdf5

I have checked out and the file exist, so that is not the problem. I think is something connected with hdf5.

The code is the following:

    subroutine replications_translation(n_a,nsub,lbox,directory)
    ! subroutine to do the translations along all the the replications
    ! it makes use of the
    ! n_a: number of replications per axis
    ! nsub: number of subvolumes
    ! lbox: box size
    ! x: x coordinate
    ! y: y coordinate
    ! z: z coordinate
    ! directory: folder that contains the replications
    ! redshift: character that specifies the redshift (e.g. iz200)
        integer, intent(in) :: n_a,nsub
        real, dimension(:),allocatable :: x,y,z,dc,decl,ra
        real, intent(in) :: lbox
        character(*), intent(in) :: directory
        !character(5), intent(in) ::redshift
        character(2) :: temp_i,temp_j,temp_k,temp_subv
        integer :: i,j,k,l,ifile,dims(1),rank,count_l
        count_l=0
        do i=-n_a,n_a
            write(temp_i,"(I2)") i
            do j=-n_a,n_a
                write(temp_j,"(I2)") j
                do k=-n_a,n_a
                    write(temp_k,"(I2)") k
                    do l=30,nsub-1

                        write(temp_subv,"(I2)") l
                        call hdf5_open_file(ifile,directory//'rep_'//trim(adjustl(temp_i))//'_'//trim(adjustl(temp_j))//'_'//trim(adjustl(temp_k))//'/ivol'//trim(adjustl(temp_subv))//'/galaxies.hdf5',readonly=.false.)

                        call hdf5_get_dimensions(ifile,'Output001/mhalo',rank,dims)
                        allocate(x(dims(1)),y(dims(1)),z(dims(1)),dc(dims(1)),ra(dims(1)),decl(dims(1)))
                        call hdf5_read_data(ifile,'Output001/xgal',x)
                        call hdf5_read_data(ifile,'Output001/ygal',y)
                        call hdf5_read_data(ifile,'Output001/zgal',z)
                        x =x+i*lbox
                        y =y+j*lbox
                        z =z+k*lbox
                        dc =sqrt(x**2.0+y**2.0+z**2.0)
                        decl=asin(z/dc)
                        ra =atan2(y,x)

                        call hdf5_write_data(ifile,'Output001/xgal_t',x,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/xgal_t/Comment','X(lightcone) coordinate of this galaxy [Mpc/h]')

                        call hdf5_write_data(ifile,'Output001/ygal_t',y,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/ygal_t/Comment','Y(lightcone) coordinate of this galaxy [Mpc/h]')

                        call hdf5_write_data(ifile,'Output001/zgal_t',z,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/zgal_t/Comment','Z(lightcone) coordinate of this galaxy [Mpc/h]')

                        call hdf5_write_data(ifile,'Output001/dc',dc,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/dc/Comment','Comoving distance [Mpc/h]')
                        !print *, "check hdf5"
                        call hdf5_write_data(ifile,'Output001/ra',ra,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/ra/Comment',"Right ascention")

                        call hdf5_write_data(ifile,'Output001/decl',decl,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/decl/Comment',"Declination")

                        call hdf5_close_file(ifile)
                        print *, "Done with "//directory//'rep_'//trim(adjustl(temp_i))//'_'//trim(adjustl(temp_j))//'_'//trim(adjustl(temp_k))//'/ivol'//trim(adjustl(temp_subv))//'/galaxies.hdf5'
                        deallocate(x,y,z,dc,ra,decl)
                        count_l=count_l+1
                        print *, "number =",count_l
                    enddo
               enddo
            enddo
        enddo

Could you please help me with that? The number of files that I need to open, read, write is tremendous. So, I dont know if that is a limitation for hdf5 or if my code is written with not good practices and that is causing the crash.

Thanks in advance,

--
Guido
_______________________________________________
Hdf-forum is for HDF software users discussion.
Hdf-forum@lists.hdfgroup.org<mailto:Hdf-forum@lists.hdfgroup.org>
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
Twitter: https://twitter.com/hdf5

Hello,

To mark:

I haven't checked the resource leakage. Actually, I don't know how to do
that. Could you please give me a reference or explain to me how to do that?

From your message what I understood is that using the same handle in a loop

can cause that problem?
I'm sending you my codes as attached files:
-lc_helper.f90 is where I have the subroutines
- light_cones.f90 is the principal code that calls the subroutines.
- Makefile

Thank you,

To Scot,

I commented everything except the opening and close statements and I didn't
get the crash. Before doing that the code crashed after the 1021 loop. Do
you think that the only way to solve this issue is avoiding opening and
closing file sin a loop? I am doing that due to the way the data is given
to me.

Guido

lc_helper.f90 (16.6 KB)

light_cones.f90 (2.83 KB)

Makefile (1.11 KB)

···

2017-02-28 0:17 GMT+08:00 <hdf-forum-request@lists.hdfgroup.org>:

Send Hdf-forum mailing list submissions to
        hdf-forum@lists.hdfgroup.org

To subscribe or unsubscribe via the World Wide Web, visit
        http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_
lists.hdfgroup.org

or, via email, send a message with subject or body 'help' to
        hdf-forum-request@lists.hdfgroup.org

You can reach the person managing the list at
        hdf-forum-owner@lists.hdfgroup.org

When replying, please edit your Subject line so it is more specific
than "Re: Contents of Hdf-forum digest..."

Today's Topics:

   1. Re: hdf5 crashing when opening and closing files inside a
      loop (Koennecke Mark (PSI))
   2. Re: hdf5 crashing when opening and closing files inside a
      loop (Scot Breitenfeld)

----------------------------------------------------------------------

Message: 1
Date: Mon, 27 Feb 2017 15:39:14 +0000
From: "Koennecke Mark (PSI)" <mark.koennecke@psi.ch>
To: HDF Users Discussion List <hdf-forum@lists.hdfgroup.org>
Subject: Re: [Hdf-forum] hdf5 crashing when opening and closing files
        inside a loop
Message-ID: <DAF1B18F-2068-411A-AAFE-9688D13E9F84@psi.ch>
Content-Type: text/plain; charset="utf-8"

Guido,

Am 26.02.2017 um 17:11 schrieb Guido granda mu?oz <guidogranda@gmail.com
<mailto:guidogranda@gmail.com>>:

Hello,
I am new in hdf5, and I recently encountered a problem. I am using fortran
90 and a fortran 90 hdf5 wrapper (https://github.com/galtay/
sphray/tree/master/hdf5_wrapper) to read and write from many hdf5 files.
The process of reading, modifying data and writing is done inside a do loop.
I found that the code runs fine until a certain point when It shows the
error message:

***Abort HDF5 : Unable to open HDF5 file in open_file()!

file name : /mnt/su3ctm/ggranda/cp_test2/rep_-1_-1_-2/ivol31/galaxies.
hdf5

I have checked out and the file exist, so that is not the problem. I think
is something connected with hdf5.

The code is the following:

    subroutine replications_translation(n_a,nsub,lbox,directory)
    ! subroutine to do the translations along all the the replications
    ! it makes use of the
    ! n_a: number of replications per axis
    ! nsub: number of subvolumes
    ! lbox: box size
    ! x: x coordinate
    ! y: y coordinate
    ! z: z coordinate
    ! directory: folder that contains the replications
    ! redshift: character that specifies the redshift (e.g. iz200)
        integer, intent(in) :: n_a,nsub
        real, dimension(:),allocatable :: x,y,z,dc,decl,ra
        real, intent(in) :: lbox
        character(*), intent(in) :: directory
        !character(5), intent(in) ::redshift
        character(2) :: temp_i,temp_j,temp_k,temp_subv
        integer :: i,j,k,l,ifile,dims(1),rank,count_l
        count_l=0
        do i=-n_a,n_a
            write(temp_i,"(I2)") i
            do j=-n_a,n_a
                write(temp_j,"(I2)") j
                do k=-n_a,n_a
                    write(temp_k,"(I2)") k
                    do l=30,nsub-1

                        write(temp_subv,"(I2)") l
                        call hdf5_open_file(ifile,directory//'rep_'//trim(
adjustl(temp_i))//'_'//trim(adjustl(temp_j))//'_'//trim(
adjustl(temp_k))//'/ivol'//trim(adjustl(temp_subv))//'/
galaxies.hdf5',readonly=.false.)

                        call hdf5_get_dimensions(ifile,'
Output001/mhalo',rank,dims)
                        allocate(x(dims(1)),y(dims(1))
,z(dims(1)),dc(dims(1)),ra(dims(1)),decl(dims(1)))
                        call hdf5_read_data(ifile,'Output001/xgal',x)
                        call hdf5_read_data(ifile,'Output001/ygal',y)
                        call hdf5_read_data(ifile,'Output001/zgal',z)
                        x =x+i*lbox
                        y =y+j*lbox
                        z =z+k*lbox
                        dc =sqrt(x**2.0+y**2.0+z**2.0)
                        decl=asin(z/dc)
                        ra =atan2(y,x)

                        call hdf5_write_data(ifile,'
Output001/xgal_t',x,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'
Output001/xgal_t/Comment','X(lightcone) coordinate of this galaxy
[Mpc/h]')

                        call hdf5_write_data(ifile,'
Output001/ygal_t',y,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'
Output001/ygal_t/Comment','Y(lightcone) coordinate of this galaxy
[Mpc/h]')

                        call hdf5_write_data(ifile,'
Output001/zgal_t',z,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'
Output001/zgal_t/Comment','Z(lightcone) coordinate of this galaxy
[Mpc/h]')

                        call hdf5_write_data(ifile,'
Output001/dc',dc,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'
Output001/dc/Comment','Comoving distance [Mpc/h]')
                        !print *, "check hdf5"
                        call hdf5_write_data(ifile,'
Output001/ra',ra,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/ra/Comment',"Right
ascention")

                        call hdf5_write_data(ifile,'Output001/decl',decl,
overwrite=.true.)
                        call hdf5_write_attribute(ifile,'
Output001/decl/Comment',"Declination")

                        call hdf5_close_file(ifile)
                        print *, "Done with "//directory//'rep_'//trim(
adjustl(temp_i))//'_'//trim(adjustl(temp_j))//'_'//trim(
adjustl(temp_k))//'/ivol'//trim(adjustl(temp_subv))//'/galaxies.hdf5'
                        deallocate(x,y,z,dc,ra,decl)
                        count_l=count_l+1
                        print *, "number =",count_l
                    enddo
               enddo
            enddo
        enddo

Could you please help me with that? The number of files that I need to
open, read, write is tremendous. So, I dont know if that is a limitation
for hdf5 or if my code is written with not good practices and that is
causing the crash.

Thanks in advance,

Have you checked for resource leakage? HDF5 is special in that way that a
close of a file does not necessarily release all handles
associated with the file. So, if you leave a handle to a dataset,
attribute, data type, etc dangling for each file, after some time
space is exhausted and HDF5 will crash on you in a weird way. Been there,
done that.

As you have not provided all your subroutines I cannot see if this is the
case with your code.

Regards,

     Mark Koennecke

--
Guido
_______________________________________________
Hdf-forum is for HDF software users discussion.
Hdf-forum@lists.hdfgroup.org<mailto:Hdf-forum@lists.hdfgroup.org>
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
Twitter: https://twitter.com/hdf5

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.hdfgroup.org/pipermail/hdf-forum_lists.
hdfgroup.org/attachments/20170227/b5af7491/attachment-0001.html>

------------------------------

Message: 2
Date: Mon, 27 Feb 2017 16:16:48 +0000
From: Scot Breitenfeld <brtnfld@hdfgroup.org>
To: HDF Users Discussion List <hdf-forum@lists.hdfgroup.org>
Subject: Re: [Hdf-forum] hdf5 crashing when opening and closing files
        inside a loop
Message-ID: <A5AA56F9-3287-4B23-96D8-C273C11E4F13@hdfgroup.org>
Content-Type: text/plain; charset="utf-8"

Out of curiosity, do you have the same issue if you comment out everything
except the open and close routine?

Also, I?m not 100% sure why you are choosing this file layout/looping
scheme. It seems that you are creating a directory structure for different
files/datasets. Can you move this directory/file structure to inside the
HDF5 file using groups? That way you can eliminate this file open/close in
the middle of the nested loop. This is not just an HDF5 issue either; I
don?t think you would want to do it this way for POSIX writes either if you
want to get good I/O performance.

Scot

On Feb 26, 2017, at 10:11 AM, Guido granda mu?oz <guidogranda@gmail.com > <mailto:guidogranda@gmail.com>> wrote:

Hello,
I am new in hdf5, and I recently encountered a problem. I am using fortran
90 and a fortran 90 hdf5 wrapper (https://github.com/galtay/
sphray/tree/master/hdf5_wrapper) to read and write from many hdf5 files.
The process of reading, modifying data and writing is done inside a do loop.
I found that the code runs fine until a certain point when It shows the
error message:

***Abort HDF5 : Unable to open HDF5 file in open_file()!

file name : /mnt/su3ctm/ggranda/cp_test2/rep_-1_-1_-2/ivol31/galaxies.
hdf5

I have checked out and the file exist, so that is not the problem. I think
is something connected with hdf5.

The code is the following:

    subroutine replications_translation(n_a,nsub,lbox,directory)
    ! subroutine to do the translations along all the the replications
    ! it makes use of the
    ! n_a: number of replications per axis
    ! nsub: number of subvolumes
    ! lbox: box size
    ! x: x coordinate
    ! y: y coordinate
    ! z: z coordinate
    ! directory: folder that contains the replications
    ! redshift: character that specifies the redshift (e.g. iz200)
        integer, intent(in) :: n_a,nsub
        real, dimension(:),allocatable :: x,y,z,dc,decl,ra
        real, intent(in) :: lbox
        character(*), intent(in) :: directory
        !character(5), intent(in) ::redshift
        character(2) :: temp_i,temp_j,temp_k,temp_subv
        integer :: i,j,k,l,ifile,dims(1),rank,count_l
        count_l=0
        do i=-n_a,n_a
            write(temp_i,"(I2)") i
            do j=-n_a,n_a
                write(temp_j,"(I2)") j
                do k=-n_a,n_a
                    write(temp_k,"(I2)") k
                    do l=30,nsub-1

                        write(temp_subv,"(I2)") l
                        call hdf5_open_file(ifile,directory//'rep_'//trim(
adjustl(temp_i))//'_'//trim(adjustl(temp_j))//'_'//trim(
adjustl(temp_k))//'/ivol'//trim(adjustl(temp_subv))//'/
galaxies.hdf5',readonly=.false.)

                        call hdf5_get_dimensions(ifile,'
Output001/mhalo',rank,dims)
                        allocate(x(dims(1)),y(dims(1))
,z(dims(1)),dc(dims(1)),ra(dims(1)),decl(dims(1)))
                        call hdf5_read_data(ifile,'Output001/xgal',x)
                        call hdf5_read_data(ifile,'Output001/ygal',y)
                        call hdf5_read_data(ifile,'Output001/zgal',z)
                        x =x+i*lbox
                        y =y+j*lbox
                        z =z+k*lbox
                        dc =sqrt(x**2.0+y**2.0+z**2.0)
                        decl=asin(z/dc)
                        ra =atan2(y,x)

                        call hdf5_write_data(ifile,'
Output001/xgal_t',x,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'
Output001/xgal_t/Comment','X(lightcone) coordinate of this galaxy
[Mpc/h]')

                        call hdf5_write_data(ifile,'
Output001/ygal_t',y,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'
Output001/ygal_t/Comment','Y(lightcone) coordinate of this galaxy
[Mpc/h]')

                        call hdf5_write_data(ifile,'
Output001/zgal_t',z,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'
Output001/zgal_t/Comment','Z(lightcone) coordinate of this galaxy
[Mpc/h]')

                        call hdf5_write_data(ifile,'
Output001/dc',dc,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'
Output001/dc/Comment','Comoving distance [Mpc/h]')
                        !print *, "check hdf5"
                        call hdf5_write_data(ifile,'
Output001/ra',ra,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/ra/Comment',"Right
ascention")

                        call hdf5_write_data(ifile,'Output001/decl',decl,
overwrite=.true.)
                        call hdf5_write_attribute(ifile,'
Output001/decl/Comment',"Declination")

                        call hdf5_close_file(ifile)
                        print *, "Done with "//directory//'rep_'//trim(
adjustl(temp_i))//'_'//trim(adjustl(temp_j))//'_'//trim(
adjustl(temp_k))//'/ivol'//trim(adjustl(temp_subv))//'/galaxies.hdf5'
                        deallocate(x,y,z,dc,ra,decl)
                        count_l=count_l+1
                        print *, "number =",count_l
                    enddo
               enddo
            enddo
        enddo

Could you please help me with that? The number of files that I need to
open, read, write is tremendous. So, I dont know if that is a limitation
for hdf5 or if my code is written with not good practices and that is
causing the crash.

Thanks in advance,

--
Guido
_______________________________________________
Hdf-forum is for HDF software users discussion.
Hdf-forum@lists.hdfgroup.org<mailto:Hdf-forum@lists.hdfgroup.org>
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
Twitter: https://twitter.com/hdf5

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.hdfgroup.org/pipermail/hdf-forum_lists.
hdfgroup.org/attachments/20170227/ea6149be/attachment.html>

------------------------------

Subject: Digest Footer

_______________________________________________
Hdf-forum is for HDF software users discussion.
Hdf-forum@lists.hdfgroup.org
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org

------------------------------

End of Hdf-forum Digest, Vol 92, Issue 25
*****************************************

--
Guido

Dear Guido,

···

Am 27.02.2017 um 18:15 schrieb Guido granda muñoz <guidogranda@gmail.com<mailto:guidogranda@gmail.com>>:

Hello,

To mark:

I haven't checked the resource leakage. Actually, I don't know how to do that. Could you please give me a reference or explain to me how to do that?
From your message what I understood is that using the same handle in a loop can cause that problem?
I'm sending you my codes as attached files:
-lc_helper.f90 is where I have the subroutines
- light_cones.f90 is the principal code that calls the subroutines.
- Makefile

Thank you,

There is a function which you can call just before H5Fclose() which tells you the number of objects which are still open.
This is h5fget_obj_count_f(), documented here: https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-GetObjCount

You may need to login to the HDF5 WWW-site to get there.

If you find a positive count then you have to debug that hdf5_wrapper library you are using.

Another option is to restore proper close semantics by setting the close degree to strong in the file creation property list with H5Pset_close_degree_f().
See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5P.html#Property-SetFcloseDegree

Regards,

     Mark Könnecke

To Scot,

I commented everything except the opening and close statements and I didn't get the crash. Before doing that the code crashed after the 1021 loop. Do you think that the only way to solve this issue is avoiding opening and closing file sin a loop? I am doing that due to the way the data is given to me.

Guido

2017-02-28 0:17 GMT+08:00 <hdf-forum-request@lists.hdfgroup.org<mailto:hdf-forum-request@lists.hdfgroup.org>>:
Send Hdf-forum mailing list submissions to
        hdf-forum@lists.hdfgroup.org<mailto:hdf-forum@lists.hdfgroup.org>

To subscribe or unsubscribe via the World Wide Web, visit
        http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org

or, via email, send a message with subject or body 'help' to
        hdf-forum-request@lists.hdfgroup.org<mailto:hdf-forum-request@lists.hdfgroup.org>

You can reach the person managing the list at
        hdf-forum-owner@lists.hdfgroup.org<mailto:hdf-forum-owner@lists.hdfgroup.org>

When replying, please edit your Subject line so it is more specific
than "Re: Contents of Hdf-forum digest..."

Today's Topics:

   1. Re: hdf5 crashing when opening and closing files inside a
      loop (Koennecke Mark (PSI))
   2. Re: hdf5 crashing when opening and closing files inside a
      loop (Scot Breitenfeld)

----------------------------------------------------------------------

Message: 1
Date: Mon, 27 Feb 2017 15:39:14 +0000
From: "Koennecke Mark (PSI)" <mark.koennecke@psi.ch<mailto:mark.koennecke@psi.ch>>
To: HDF Users Discussion List <hdf-forum@lists.hdfgroup.org<mailto:hdf-forum@lists.hdfgroup.org>>
Subject: Re: [Hdf-forum] hdf5 crashing when opening and closing files
        inside a loop
Message-ID: <DAF1B18F-2068-411A-AAFE-9688D13E9F84@psi.ch<mailto:DAF1B18F-2068-411A-AAFE-9688D13E9F84@psi.ch>>
Content-Type: text/plain; charset="utf-8"

Guido,

Am 26.02.2017 um 17:11 schrieb Guido granda mu?oz <guidogranda@gmail.com<mailto:guidogranda@gmail.com><mailto:guidogranda@gmail.com<mailto:guidogranda@gmail.com>>>:

Hello,
I am new in hdf5, and I recently encountered a problem. I am using fortran 90 and a fortran 90 hdf5 wrapper (https://github.com/galtay/sphray/tree/master/hdf5_wrapper) to read and write from many hdf5 files. The process of reading, modifying data and writing is done inside a do loop.
I found that the code runs fine until a certain point when It shows the error message:

***Abort HDF5 : Unable to open HDF5 file in open_file()!

file name : /mnt/su3ctm/ggranda/cp_test2/rep_-1_-1_-2/ivol31/galaxies.hdf5

I have checked out and the file exist, so that is not the problem. I think is something connected with hdf5.

The code is the following:

    subroutine replications_translation(n_a,nsub,lbox,directory)
    ! subroutine to do the translations along all the the replications
    ! it makes use of the
    ! n_a: number of replications per axis
    ! nsub: number of subvolumes
    ! lbox: box size
    ! x: x coordinate
    ! y: y coordinate
    ! z: z coordinate
    ! directory: folder that contains the replications
    ! redshift: character that specifies the redshift (e.g. iz200)
        integer, intent(in) :: n_a,nsub
        real, dimension(:),allocatable :: x,y,z,dc,decl,ra
        real, intent(in) :: lbox
        character(*), intent(in) :: directory
        !character(5), intent(in) ::redshift
        character(2) :: temp_i,temp_j,temp_k,temp_subv
        integer :: i,j,k,l,ifile,dims(1),rank,count_l
        count_l=0
        do i=-n_a,n_a
            write(temp_i,"(I2)") i
            do j=-n_a,n_a
                write(temp_j,"(I2)") j
                do k=-n_a,n_a
                    write(temp_k,"(I2)") k
                    do l=30,nsub-1

                        write(temp_subv,"(I2)") l
                        call hdf5_open_file(ifile,directory//'rep_'//trim(adjustl(temp_i))//'_'//trim(adjustl(temp_j))//'_'//trim(adjustl(temp_k))//'/ivol'//trim(adjustl(temp_subv))//'/galaxies.hdf5',readonly=.false.)

                        call hdf5_get_dimensions(ifile,'Output001/mhalo',rank,dims)
                        allocate(x(dims(1)),y(dims(1)),z(dims(1)),dc(dims(1)),ra(dims(1)),decl(dims(1)))
                        call hdf5_read_data(ifile,'Output001/xgal',x)
                        call hdf5_read_data(ifile,'Output001/ygal',y)
                        call hdf5_read_data(ifile,'Output001/zgal',z)
                        x =x+i*lbox
                        y =y+j*lbox
                        z =z+k*lbox
                        dc =sqrt(x**2.0+y**2.0+z**2.0)
                        decl=asin(z/dc)
                        ra =atan2(y,x)

                        call hdf5_write_data(ifile,'Output001/xgal_t',x,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/xgal_t/Comment','X(lightcone) coordinate of this galaxy [Mpc/h]')

                        call hdf5_write_data(ifile,'Output001/ygal_t',y,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/ygal_t/Comment','Y(lightcone) coordinate of this galaxy [Mpc/h]')

                        call hdf5_write_data(ifile,'Output001/zgal_t',z,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/zgal_t/Comment','Z(lightcone) coordinate of this galaxy [Mpc/h]')

                        call hdf5_write_data(ifile,'Output001/dc',dc,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/dc/Comment','Comoving distance [Mpc/h]')
                        !print *, "check hdf5"
                        call hdf5_write_data(ifile,'Output001/ra',ra,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/ra/Comment',"Right ascention")

                        call hdf5_write_data(ifile,'Output001/decl',decl,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/decl/Comment',"Declination")

                        call hdf5_close_file(ifile)
                        print *, "Done with "//directory//'rep_'//trim(adjustl(temp_i))//'_'//trim(adjustl(temp_j))//'_'//trim(adjustl(temp_k))//'/ivol'//trim(adjustl(temp_subv))//'/galaxies.hdf5'
                        deallocate(x,y,z,dc,ra,decl)
                        count_l=count_l+1
                        print *, "number =",count_l
                    enddo
               enddo
            enddo
        enddo

Could you please help me with that? The number of files that I need to open, read, write is tremendous. So, I dont know if that is a limitation for hdf5 or if my code is written with not good practices and that is causing the crash.

Thanks in advance,

Have you checked for resource leakage? HDF5 is special in that way that a close of a file does not necessarily release all handles
associated with the file. So, if you leave a handle to a dataset, attribute, data type, etc dangling for each file, after some time
space is exhausted and HDF5 will crash on you in a weird way. Been there, done that.

As you have not provided all your subroutines I cannot see if this is the case with your code.

Regards,

     Mark Koennecke

--
Guido
_______________________________________________
Hdf-forum is for HDF software users discussion.
Hdf-forum@lists.hdfgroup.org<mailto:Hdf-forum@lists.hdfgroup.org><mailto:Hdf-forum@lists.hdfgroup.org<mailto:Hdf-forum@lists.hdfgroup.org>>
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
Twitter: https://twitter.com/hdf5

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.hdfgroup.org/pipermail/hdf-forum_lists.hdfgroup.org/attachments/20170227/b5af7491/attachment-0001.html>

------------------------------

Message: 2
Date: Mon, 27 Feb 2017 16:16:48 +0000
From: Scot Breitenfeld <brtnfld@hdfgroup.org<mailto:brtnfld@hdfgroup.org>>
To: HDF Users Discussion List <hdf-forum@lists.hdfgroup.org<mailto:hdf-forum@lists.hdfgroup.org>>
Subject: Re: [Hdf-forum] hdf5 crashing when opening and closing files
        inside a loop
Message-ID: <A5AA56F9-3287-4B23-96D8-C273C11E4F13@hdfgroup.org<mailto:A5AA56F9-3287-4B23-96D8-C273C11E4F13@hdfgroup.org>>
Content-Type: text/plain; charset="utf-8"

Out of curiosity, do you have the same issue if you comment out everything except the open and close routine?

Also, I?m not 100% sure why you are choosing this file layout/looping scheme. It seems that you are creating a directory structure for different files/datasets. Can you move this directory/file structure to inside the HDF5 file using groups? That way you can eliminate this file open/close in the middle of the nested loop. This is not just an HDF5 issue either; I don?t think you would want to do it this way for POSIX writes either if you want to get good I/O performance.

Scot

On Feb 26, 2017, at 10:11 AM, Guido granda mu?oz <guidogranda@gmail.com<mailto:guidogranda@gmail.com><mailto:guidogranda@gmail.com<mailto:guidogranda@gmail.com>>> wrote:

Hello,
I am new in hdf5, and I recently encountered a problem. I am using fortran 90 and a fortran 90 hdf5 wrapper (https://github.com/galtay/sphray/tree/master/hdf5_wrapper) to read and write from many hdf5 files. The process of reading, modifying data and writing is done inside a do loop.
I found that the code runs fine until a certain point when It shows the error message:

***Abort HDF5 : Unable to open HDF5 file in open_file()!

file name : /mnt/su3ctm/ggranda/cp_test2/rep_-1_-1_-2/ivol31/galaxies.hdf5

I have checked out and the file exist, so that is not the problem. I think is something connected with hdf5.

The code is the following:

    subroutine replications_translation(n_a,nsub,lbox,directory)
    ! subroutine to do the translations along all the the replications
    ! it makes use of the
    ! n_a: number of replications per axis
    ! nsub: number of subvolumes
    ! lbox: box size
    ! x: x coordinate
    ! y: y coordinate
    ! z: z coordinate
    ! directory: folder that contains the replications
    ! redshift: character that specifies the redshift (e.g. iz200)
        integer, intent(in) :: n_a,nsub
        real, dimension(:),allocatable :: x,y,z,dc,decl,ra
        real, intent(in) :: lbox
        character(*), intent(in) :: directory
        !character(5), intent(in) ::redshift
        character(2) :: temp_i,temp_j,temp_k,temp_subv
        integer :: i,j,k,l,ifile,dims(1),rank,count_l
        count_l=0
        do i=-n_a,n_a
            write(temp_i,"(I2)") i
            do j=-n_a,n_a
                write(temp_j,"(I2)") j
                do k=-n_a,n_a
                    write(temp_k,"(I2)") k
                    do l=30,nsub-1

                        write(temp_subv,"(I2)") l
                        call hdf5_open_file(ifile,directory//'rep_'//trim(adjustl(temp_i))//'_'//trim(adjustl(temp_j))//'_'//trim(adjustl(temp_k))//'/ivol'//trim(adjustl(temp_subv))//'/galaxies.hdf5',readonly=.false.)

                        call hdf5_get_dimensions(ifile,'Output001/mhalo',rank,dims)
                        allocate(x(dims(1)),y(dims(1)),z(dims(1)),dc(dims(1)),ra(dims(1)),decl(dims(1)))
                        call hdf5_read_data(ifile,'Output001/xgal',x)
                        call hdf5_read_data(ifile,'Output001/ygal',y)
                        call hdf5_read_data(ifile,'Output001/zgal',z)
                        x =x+i*lbox
                        y =y+j*lbox
                        z =z+k*lbox
                        dc =sqrt(x**2.0+y**2.0+z**2.0)
                        decl=asin(z/dc)
                        ra =atan2(y,x)

                        call hdf5_write_data(ifile,'Output001/xgal_t',x,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/xgal_t/Comment','X(lightcone) coordinate of this galaxy [Mpc/h]')

                        call hdf5_write_data(ifile,'Output001/ygal_t',y,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/ygal_t/Comment','Y(lightcone) coordinate of this galaxy [Mpc/h]')

                        call hdf5_write_data(ifile,'Output001/zgal_t',z,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/zgal_t/Comment','Z(lightcone) coordinate of this galaxy [Mpc/h]')

                        call hdf5_write_data(ifile,'Output001/dc',dc,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/dc/Comment','Comoving distance [Mpc/h]')
                        !print *, "check hdf5"
                        call hdf5_write_data(ifile,'Output001/ra',ra,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/ra/Comment',"Right ascention")

                        call hdf5_write_data(ifile,'Output001/decl',decl,overwrite=.true.)
                        call hdf5_write_attribute(ifile,'Output001/decl/Comment',"Declination")

                        call hdf5_close_file(ifile)
                        print *, "Done with "//directory//'rep_'//trim(adjustl(temp_i))//'_'//trim(adjustl(temp_j))//'_'//trim(adjustl(temp_k))//'/ivol'//trim(adjustl(temp_subv))//'/galaxies.hdf5'
                        deallocate(x,y,z,dc,ra,decl)
                        count_l=count_l+1
                        print *, "number =",count_l
                    enddo
               enddo
            enddo
        enddo

Could you please help me with that? The number of files that I need to open, read, write is tremendous. So, I dont know if that is a limitation for hdf5 or if my code is written with not good practices and that is causing the crash.

Thanks in advance,

--
Guido
_______________________________________________
Hdf-forum is for HDF software users discussion.
Hdf-forum@lists.hdfgroup.org<mailto:Hdf-forum@lists.hdfgroup.org><mailto:Hdf-forum@lists.hdfgroup.org<mailto:Hdf-forum@lists.hdfgroup.org>>
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
Twitter: https://twitter.com/hdf5

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.hdfgroup.org/pipermail/hdf-forum_lists.hdfgroup.org/attachments/20170227/ea6149be/attachment.html>

------------------------------

Subject: Digest Footer

_______________________________________________
Hdf-forum is for HDF software users discussion.
Hdf-forum@lists.hdfgroup.org<mailto:Hdf-forum@lists.hdfgroup.org>
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org

------------------------------

End of Hdf-forum Digest, Vol 92, Issue 25
*****************************************

--
Guido
<lc_helper.f90><light_cones.f90><Makefile>_______________________________________________
Hdf-forum is for HDF software users discussion.
Hdf-forum@lists.hdfgroup.org<mailto:Hdf-forum@lists.hdfgroup.org>
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
Twitter: https://twitter.com/hdf5