Difference between revisions of "Hdf5 table"

From oldwiki.scinet.utoronto.ca
Jump to navigation Jump to search
Line 69: Line 69:
 
module load vacpp/12.1  xlf/14.1  mpich2/xl hdf5/189-v18-mpich2-xlc
 
module load vacpp/12.1  xlf/14.1  mpich2/xl hdf5/189-v18-mpich2-xlc
 
mpixlcxx -I$SCINET_HDF5_INC -L$SCINET_ZLIB_LIB -L$SCINET_SZIP_LIB -L$SCINET_HDF5_LIB Test.cpp -o Test -lhdf5_hl -lhdf5 -lsz -lz
 
mpixlcxx -I$SCINET_HDF5_INC -L$SCINET_ZLIB_LIB -L$SCINET_SZIP_LIB -L$SCINET_HDF5_LIB Test.cpp -o Test -lhdf5_hl -lhdf5 -lsz -lz
 +
</pre>
 +
Test.cpp :
 +
<pre>
 +
#include "hdf5.h"
 +
#include "hdf5_hl.h"
 +
#include <stdlib.h>
 +
#include <iostream>
 +
#include <stdint.h>
 +
#include <mpi.h>
 +
#define NFIELDS  (hsize_t)  7
 +
#define H5FILE_NAME    "tutorial1.h5"
 +
 +
int main(int argc, char *argv[])
 +
{
 +
  // DEF OF SIZE OF VARIABLES TO READ                                                                                                                       
 +
  typedef struct Particle
 +
  {
 +
    unsigned short int    ADCcount;
 +
    double energy;
 +
    int grid_i;
 +
    int grid_j;
 +
    long  idnumber;
 +
    char  name[16];
 +
    float pressure;
 +
  } Particle;
 +
 +
  /* Calculate the size and the offsets of our struct members in memory */
 +
  size_t dst_size =  sizeof( Particle );
 +
 +
  /* Calculate the size and the offsets of our struct members in memory */
 +
  size_t dst_size =  sizeof( Particle );
 +
 +
  size_t dst_offset[NFIELDS] = {
 +
    HOFFSET( Particle, ADCcount ),
 +
    HOFFSET( Particle, energy ),
 +
    HOFFSET( Particle, grid_i ),
 +
    HOFFSET( Particle, grid_j ),
 +
    HOFFSET( Particle, idnumber ),
 +
    HOFFSET( Particle, name ),
 +
    HOFFSET( Particle, pressure ),
 +
  };
 +
 +
  //////////////////////////////////////////////////////////////////////////////////////////////////////////////                                             
 +
  //MPI                                                                                                                                                     
 +
 +
  //HDF5 APIs definitions                                                                                                                                   
 +
  hid_t      file_id;        /* file and dataset identifiers */
 +
  hid_t plist_id;        /* property list identifier( access template) */
 +
  herr_t status;
 +
 +
  // MPI variables                                                                                                                                           
 +
  int mpi_size, mpi_rank;
 +
  MPI_Comm comm  = MPI_COMM_WORLD;
 +
  MPI_Info info  = MPI_INFO_NULL;
 +
 +
  //Initialize MPI                                                                                                                                           
 +
  MPI_Init(&argc, &argv);
 +
  MPI_Comm_size(comm, &mpi_size);
 +
  MPI_Comm_rank(comm, &mpi_rank);
 +
 +
  // Set up file access property list with parallel I/O access                                                                                               
 +
  plist_id = H5Pcreate(H5P_FILE_ACCESS);//creates a new property list as an instance of some property list class                                             
 +
  H5Pset_fapl_mpio(plist_id, comm, info);
 +
 +
  // Read file collectively.                                                                                                                                 
 +
  file_id = H5Fopen(H5FILE_NAME, H5F_ACC_RDONLY, plist_id);//H5F_ACC_RDONLY : read-only mode                                                                 
 +
 +
  Particle  dst_buf[1];
 +
  size_t dst_sizes[NFIELDS] = {
 +
    sizeof( dst_buf[0].ADCcount),
 +
    sizeof( dst_buf[0].energy),
 +
    sizeof( dst_buf[0].grid_i),
 +
    sizeof( dst_buf[0].grid_j),
 +
    sizeof( dst_buf[0].idnumber),
 +
    sizeof( dst_buf[0].name),
 +
    sizeof( dst_buf[0].pressure),
 +
  };
 +
 +
  //READ FRACTION OF TABLE : example reading one record per MPI process                                                                                     
 +
  hsize_t start=mpi_rank;//read Record number mpi_rank                                                                                                       
 +
  hsize_t nrecords=1;//read 1 record                                                                                                                         
 +
  status=H5TBread_records(file_id,"/detector/readout",start,nrecords,dst_size,dst_offset,dst_sizes,dst_buf);
 +
 +
  std::cout<<"Rank = "<<mpi_rank
 +
          <<" ,ADCcount = "<<dst_buf[0].ADCcount
 +
          <<" ,idnumber = "<<dst_buf[0].idnumber
 +
          <<" ,grid_i = "<<dst_buf[0].grid_i
 +
          <<" ,grid_j = "<<dst_buf[0].grid_j
 +
          <<" ,pressure = "<<dst_buf[0].pressure
 +
          <<" ,name = "<<dst_buf[0].name
 +
          <<" ,energy = "<<dst_buf[0].energy
 +
          <<std::endl;
 +
 +
 +
  //Close property list.                                                                                                                                     
 +
  H5Pclose(plist_id);
 +
 +
  // Close the file.                                                                                                                                         
 +
  H5Fclose(file_id);
 +
 +
  MPI_Finalize();
 +
 +
  return 1;
 +
}
 
</pre>
 
</pre>

Revision as of 12:27, 17 December 2013

Storing table in HDF5

The HDF5 Table interface condenses the steps needed to create tables in HDF5. The datatype of the dataset that gets created is of type H5T_COMPOUND. The members of the table can have different datatypes.


Writting a table using Python (PyTables)

PyTables is a package for managing hierarchical datasets and designed to efficiently and easily cope with extremely large amounts of data. PyTables is built on top of the HDF5 library, using the Python language and the NumPy package. The following example shows how to store a table of 10 records with 7 members :

name ADCcount grid_i grid_j pressure energy idnumber
16-character String Unsigned short integer 32-bit integer 32-bit integer float (single-precision) double (double-precision) Signed 64-bit integer

The script has been run on gpc with the following modules :

module load gcc/4.8.1  intel/14.0.0  python/2.7.2  hdf5/1811-v18-serial-gcc

PyTable 3.0.0 has been compiled in my scratch directory.

from tables import *

class Particle(IsDescription):
    name      = StringCol(16)   # 16-character String                                                                                                         
    ADCcount  = UInt16Col()     # Unsigned short integer                                                                                                      
    grid_i    = Int32Col()      # 32-bit integer                                                                                                              
    grid_j    = Int32Col()      # 32-bit integer                                                                                                              
    pressure  = Float32Col()    # float  (single-precision)                                                                                                   
    energy    = Float64Col()    # double (double-precision)                                                                                                   
    idnumber  = Int64Col()      # Signed 64-bit integer                                                                                                       


h5file = open_file("tutorial1.h5", mode = "w", title = "Test file")
group = h5file.create_group("/", 'detector', 'Detector information')
table = h5file.create_table(group, 'readout', Particle, "Readout example")
particle = table.row
for i in xrange(10):
    particle['name']  = 'Particle: %6d' % (i)
    particle['ADCcount'] = (i * 256) % (1 << 16)
    particle['grid_i'] = i
    particle['grid_j'] = 10 - i
    particle['pressure'] = float(i*i)
    particle['energy'] = float(particle['pressure'] ** 4)
    particle['idnumber'] = i * (2 ** 34)
    # Insert a new particle record                                                                                                                            
    particle.append()

h5file.close()

Reading the table with a C++ code with MPI for parallel programming

The following example shows how to read the table in a MPI process (each MPI process will read one individual record). The code has been compiled and tested on BlueGene with the following modules :

module load vacpp/12.1  xlf/14.1  mpich2/xl hdf5/189-v18-mpich2-xlc
mpixlcxx -I$SCINET_HDF5_INC -L$SCINET_ZLIB_LIB -L$SCINET_SZIP_LIB -L$SCINET_HDF5_LIB Test.cpp -o Test -lhdf5_hl -lhdf5 -lsz -lz

Test.cpp :

#include "hdf5.h"
#include "hdf5_hl.h"
#include <stdlib.h>
#include <iostream>
#include <stdint.h>
#include <mpi.h>
#define NFIELDS  (hsize_t)  7
#define H5FILE_NAME     "tutorial1.h5"

int main(int argc, char *argv[])
{
  // DEF OF SIZE OF VARIABLES TO READ                                                                                                                         
  typedef struct Particle
  {
    unsigned short int    ADCcount;
    double energy;
    int grid_i;
    int grid_j;
    long  idnumber;
    char   name[16];
    float pressure;
  } Particle;

  /* Calculate the size and the offsets of our struct members in memory */
  size_t dst_size =  sizeof( Particle );

  /* Calculate the size and the offsets of our struct members in memory */
  size_t dst_size =  sizeof( Particle );

  size_t dst_offset[NFIELDS] = {
    HOFFSET( Particle, ADCcount ),
    HOFFSET( Particle, energy ),
    HOFFSET( Particle, grid_i ),
    HOFFSET( Particle, grid_j ),
    HOFFSET( Particle, idnumber ),
    HOFFSET( Particle, name ),
    HOFFSET( Particle, pressure ),
  };

  //////////////////////////////////////////////////////////////////////////////////////////////////////////////                                              
  //MPI                                                                                                                                                       

  //HDF5 APIs definitions                                                                                                                                     
  hid_t       file_id;         /* file and dataset identifiers */
  hid_t plist_id;        /* property list identifier( access template) */
  herr_t status;

  // MPI variables                                                                                                                                            
  int mpi_size, mpi_rank;
  MPI_Comm comm  = MPI_COMM_WORLD;
  MPI_Info info  = MPI_INFO_NULL;

  //Initialize MPI                                                                                                                                            
  MPI_Init(&argc, &argv);
  MPI_Comm_size(comm, &mpi_size);
  MPI_Comm_rank(comm, &mpi_rank);

  // Set up file access property list with parallel I/O access                                                                                                
  plist_id = H5Pcreate(H5P_FILE_ACCESS);//creates a new property list as an instance of some property list class                                              
  H5Pset_fapl_mpio(plist_id, comm, info);

  // Read file collectively.                                                                                                                                  
  file_id = H5Fopen(H5FILE_NAME, H5F_ACC_RDONLY, plist_id);//H5F_ACC_RDONLY : read-only mode                                                                  

  Particle  dst_buf[1];
  size_t dst_sizes[NFIELDS] = {
    sizeof( dst_buf[0].ADCcount),
    sizeof( dst_buf[0].energy),
    sizeof( dst_buf[0].grid_i),
    sizeof( dst_buf[0].grid_j),
    sizeof( dst_buf[0].idnumber),
    sizeof( dst_buf[0].name),
    sizeof( dst_buf[0].pressure),
  };

  //READ FRACTION OF TABLE : example reading one record per MPI process                                                                                       
  hsize_t start=mpi_rank;//read Record number mpi_rank                                                                                                        
  hsize_t nrecords=1;//read 1 record                                                                                                                          
  status=H5TBread_records(file_id,"/detector/readout",start,nrecords,dst_size,dst_offset,dst_sizes,dst_buf);

  std::cout<<"Rank = "<<mpi_rank
           <<" ,ADCcount = "<<dst_buf[0].ADCcount
           <<" ,idnumber = "<<dst_buf[0].idnumber
           <<" ,grid_i = "<<dst_buf[0].grid_i
           <<" ,grid_j = "<<dst_buf[0].grid_j
           <<" ,pressure = "<<dst_buf[0].pressure
           <<" ,name = "<<dst_buf[0].name
           <<" ,energy = "<<dst_buf[0].energy
           <<std::endl;


  //Close property list.                                                                                                                                      
  H5Pclose(plist_id);

  // Close the file.                                                                                                                                          
  H5Fclose(file_id);

  MPI_Finalize();

  return 1;
}