MUQ  0.4.3
HDF5File.h
Go to the documentation of this file.
1 #ifndef HDF5FILE_H_
2 #define HDF5FILE_H_
3 
4 #include <Eigen/Core>
5 
8 
9 #include <vector>
10 #include <memory>
11 
12 #include <hdf5.h>
13 #include <hdf5_hl.h>
14 
15 
16 namespace muq
17 {
18 
19 namespace Utilities
20 {
21 
23  class HDF5File : public std::enable_shared_from_this<HDF5File> {
24  public:
25 
27 
33  HDF5File(std::string const& filename_);
34 
36  virtual ~HDF5File();
37 
39 
44  void Open(std::string const& filename_);
45 
47 
50  void Close();
51 
53 
56  void Copy(std::string const& destName, std::shared_ptr<HDF5File> srcFile, std::string const& srcName);
57 
59 
69  template<typename scalarType, int fixedRows, int fixedCols>
70  void WriteMatrix(std::string name, Eigen::Matrix<scalarType, fixedRows, fixedCols> const& dataset) {
71 
72  // data set name must begin with '/'
73  if( name.at(0)!='/' ) {
74  std::cerr << std::endl << "ERROR: Paths in the HDF5 file must start with a forward slash (/)" << std::endl << "\tHDF5File::WriteMatrix(std::string const&, Eigen::Matrix<scalarType, fixedRows, fixedCols> const&)" << std::endl << std::endl;
75  assert(name.at(0) == '/');
76  }
77 
78  if((name.at(0)=='/')&&(name.size()>0)){
79  if(name.at(1)=='/'){
80  name.erase(name.begin());
81  }
82  }
83 
84  // make sure the file is open
85  assert(fileID>0);
86 
87  // set the maximum size of the data set to be unlimited
88  const hsize_t maxdim[2] = {H5S_UNLIMITED, H5S_UNLIMITED};
89 
90  const hsize_t dimsf[2] = {(hsize_t)dataset.rows(), (hsize_t)dataset.cols()};
91 
92  // create the dataspace for the dataset.
93  const hid_t filespace = H5Screate_simple(2, dimsf, maxdim);
94  assert(filespace>0);
95 
96  // the dataset we are writing to
97  hid_t dataID;
98  if( DoesDataSetExist(name) ) {
99 
100  // open the data
101  dataID = H5Dopen(fileID, name.c_str(), H5P_DEFAULT);
102 
103  // get the dataspace dimension size
104  hsize_t dims[2];
105  H5Sget_simple_extent_dims(H5Dget_space(dataID), dims, nullptr);
106 
107  if( (dimsf[0]!=dims[0]) || (dimsf[1]!=dims[1]) ) { // if the data size changes ...
108  // Extend the dataset
109  H5Dset_extent(dataID, dimsf);
110  }
111 
112  } else {
113 
114  // get the parent path
115  std::string parentPath = GetParentPath(name);
116 
117  // make sure the parent exists (and create it if it does not)
118  CreateGroup(parentPath);
119 
120  // modify data set creation properties --- enable chunking
121  const hid_t prop = H5Pcreate(H5P_DATASET_CREATE);
122 
123  // set chunk size to be (1,1) -- may be a bad idea of the data set is extremely large
124  hsize_t chunk[2] = {100,100};
125  if(fixedRows>0){
126  chunk[0] = std::min<hsize_t>(chunk[0],fixedRows);
127  }
128 
129  if(fixedCols>0){
130  chunk[1] = std::min<hsize_t>(chunk[1],fixedRows);
131  }
132  H5Pset_chunk(prop, 2, chunk);
133 
134  // create a dataset for each process
135  dataID = H5Dcreate(fileID, // Location identifier
136  name.c_str(), // Dataset name
137  HDF5_Type<scalarType>::GetFlag(), // Datatype identifier
138  filespace, // Dataspace identifier
139  H5P_DEFAULT, // Link creation property list
140  prop, // Dataset creation property list
141  H5P_DEFAULT); // Dataset access property list
142 
143  // close the creation properties
144  H5Pclose(prop);
145  }
146 
147  // constant reference (avoid copying) to the data transpose because of column versus row major conventions
148  Eigen::Matrix<scalarType, fixedRows, fixedCols> dataset_ = dataset.transpose();
149 
150  // write the data to file
151  H5Dwrite(dataID, HDF5_Type<scalarType>::GetFlag(), H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_.data());
152 
153  // close the filespace
154  H5Sclose(filespace);
155 
156  // close the dataset
157  H5Dclose(dataID);
158 
159  }
160 
162 
170  template<typename scalarType, int fixedRows, int fixedCols>
171  void WritePartialMatrix(std::string const& name,
172  Eigen::Matrix<scalarType, fixedRows, fixedCols> const& data,
173  unsigned int const row, unsigned int const col)
174  {
175  // data set name must begin with '/'
176  if( name.at(0)!='/' ) {
177  std::cerr << std::endl << "ERROR: Paths in the HDF5 file must start with a forward slash (/)" << std::endl << "\tHDF5File::WriteMatrix(std::string const&, Eigen::Matrix<scalarType, fixedRows, fixedCols> const&)" << std::endl << std::endl;
178  assert(name.at(0) == '/');
179  }
180 
181  // make sure the file is open
182  assert(fileID>0);
183 
184  // how many elements to move in each dimension
185  const hsize_t stride[2] = {1, 1};
186 
187  // how many blocks to move in each dimension
188  const hsize_t count[2] = {1, 1};
189 
190  // make sure the data set exists
191  if( !DoesDataSetExist(name) ) {
192  std::cerr << std::endl << "ERROR: Dataset " << name << " does not exsts." << std::endl << std::endl;
193  assert(DoesDataSetExist(name));
194  }
195 
196  // open the data set
197  const hid_t dataID = H5Dopen(fileID, name.c_str(), H5P_DEFAULT);
198 
199  // get the space in the file
200  const hid_t fullspace = H5Dget_space(dataID);
201 
202  // the location in the dataset where we start writing
203  const hsize_t start[2] = {row, col};
204 
205  // the size of each block
206  const hsize_t block[2] = {(hsize_t)data.rows(), (hsize_t)data.cols()};
207 
208  // get the hyperslab (the subspace needed for the partial write)
209  H5Sselect_hyperslab(fullspace, H5S_SELECT_SET, start, stride, count, block);
210 
211  // get the dataspace for the partial write
212  const hsize_t limitedBlock = H5Screate_simple(2, block, block);
213 
214  // constant reference (prevent copying) to transpose data
215  const Eigen::Matrix<scalarType, fixedRows, fixedCols>& data_ = data.transpose();
216 
217  // write to file
218  auto write = H5Dwrite(dataID,
220  limitedBlock,
221  fullspace,
222  H5P_DEFAULT,
223  data_.data());
224 
225  // close the spaces
226  H5Sclose(limitedBlock);
227  H5Sclose(fullspace);
228  H5Dclose(dataID);
229  }
230 
232 
238  template<typename scalarType = double, int fixedRows = Eigen::Dynamic, int fixedCols = Eigen::Dynamic>
239  Eigen::Matrix<scalarType, fixedRows, fixedCols> ReadMatrix(std::string const& name) const {
240 
241  // make sure the file is open
242  assert(fileID>0);
243 
244  if( !DoesDataSetExist(name) ) { // if the data does not exist
245  // ... return an empty matrix.
246  return Eigen::Matrix<scalarType, fixedRows, fixedCols>();
247  }
248 
249  // get the dataset ID
250  const hid_t dataset = H5Dopen2(hid_t(fileID), name.c_str(), H5P_DEFAULT);
251 
252  // get the space associated with this data set
253  const hid_t filespace = H5Dget_space(dataset);
254 
255  const int ndims = H5Sget_simple_extent_ndims(filespace);
256 
257  // get the size of the dataset
258  std::vector<hsize_t> dimsr(ndims);
259  H5Sget_simple_extent_dims(filespace, dimsr.data(), nullptr);
260 
261  // declare the matrix that we will read into
262  Eigen::Matrix<scalarType, Eigen::Dynamic, Eigen::Dynamic> pt;
263 
264  if( ndims==2 ) { // if it is a matrix ...
265  pt.resize(dimsr[1], dimsr[0]);
266  } else { // if it is a vector ...
267  if(fixedRows==1){
268  pt = Eigen::Matrix<scalarType, 1, fixedCols>(dimsr[0]);
269  }else{
270  pt = Eigen::Matrix<scalarType, fixedRows, 1>(dimsr[0]);
271  }
272 
273  }
274 
275  // get the property list
276  const hid_t prop = H5Dget_create_plist(dataset);
277 
278  // get the memory for this data set
279  const hid_t memspace = H5Screate_simple(ndims, dimsr.data(), nullptr);
280 
281  // read the data
282  H5Dread(dataset, HDF5_Type<scalarType>::GetFlag(), memspace, filespace, H5P_DEFAULT, pt.data());
283 
284  // close the creation properties
285  H5Pclose(prop);
286 
287  // close the filespace
288  H5Sclose(filespace);
289 
290  // close the memory space
291  H5Sclose(memspace);
292 
293  // close the dataset
294  H5Dclose(dataset);
295 
296  if(ndims==2) {
297  // hdf5 and eigen have different memory convention so we read in the matrix transpose
298  if(fixedRows==1){
299  return pt;
300  }else{
301  return pt.transpose();
302  }
303  }
304 
305  // return the data
306  return pt;
307  }
308 
309 
310  template<typename scalarType=double, int fixedRows = Eigen::Dynamic, int fixedCols = Eigen::Dynamic>
311  Eigen::Matrix<scalarType, fixedRows, fixedCols> ReadPartialMatrix(std::string const& name,
312  int rowStart,
313  int colStart,
314  int numRows,
315  int numCols) const
316  {
317 
318 
319  // make sure the file is open
320  assert(fileID>0);
321 
322  if( !DoesDataSetExist(name) ) { // if the data does not exist
323  // ... return an empty matrix.
324  return Eigen::Matrix<scalarType, fixedRows, fixedCols>();
325  }
326 
327  // get the dataset ID
328  const hid_t dataset = H5Dopen2(fileID, name.c_str(), H5P_DEFAULT);
329 
330  // get the space associated with this data set
331  const hid_t dataspace = H5Dget_space(dataset);
332 
333  // get the size of the dataset
334  hsize_t dimsr[2];
335  H5Sget_simple_extent_dims(dataspace, dimsr, nullptr);
336 
337  // declare the matrix that we will read into
338  Eigen::Matrix<scalarType, fixedCols, fixedRows> pt;
339 
340  pt.resize(numCols,numRows); //<- will flip to proper orientation below
341  if( dimsr[1]>0 ) { // if it is a matrix ...
342  assert(numRows<=dimsr[0]);
343  assert(numCols<=dimsr[1]);
344  } else { // if it is a vector ...
345  assert(numRows<dimsr[0]);
346  }
347 
348  const hsize_t start[2] = {(hsize_t) rowStart, (hsize_t) colStart};
349  const hsize_t count[2] = {(hsize_t) numRows, (hsize_t) numCols};
350 
351  herr_t status = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, start, NULL, count, NULL);
352  // get the memory for this data set
353  dimsr[0] = numRows;
354  dimsr[1] = numCols;
355  hid_t memspace = H5Screate_simple(2, dimsr, nullptr);
356 
357  // read the data
358  H5Dread(dataset, HDF5_Type<scalarType>::GetFlag(), memspace, dataspace, H5P_DEFAULT, pt.data());
359 
360  // close the filespace
361  H5Sclose(dataspace);
362 
363  // close the memory space
364  H5Sclose(memspace);
365 
366  // close the dataset
367  H5Dclose(dataset);
368 
369  if( dimsr[1]>0 ) {
370  // hdf5 and eigen have different memory convention so we read in the matrix transpose
371  return pt.transpose();
372  }
373 
374  // return the data
375  return pt;
376 
377  }
378 
380 
384  bool DoesGroupExist(std::string const& name) const;
385 
387 
391  bool DoesDataSetExist(std::string const& name) const;
392 
393  bool IsDataSet(std::string const& name) const;
394  bool IsGroup(std::string const& name) const;
395 
397 
401  Eigen::VectorXi GetDataSetSize(std::string const name) const;
402 
404 
407  void CreateGroup(std::string const& name);
408 
409 
410 
412 
418  template<typename scalarType>
419  void CreateDataset(std::string const& name, int rows, int cols)
420  {
421 
422  // make sure the file is open
423  assert(fileID>0);
424 
425  // set the maximum size of the data set to be unlimited
426  const hsize_t maxdim[2] = {H5S_UNLIMITED, H5S_UNLIMITED};
427 
428  // the sizes of the data set
429  const hsize_t dimsf[2] = {(hsize_t)rows, (hsize_t)cols};
430 
431  // create the dataspace for the dataset.
432  const hid_t filespace = H5Screate_simple(2, dimsf, maxdim);
433  assert(filespace>0);
434 
435  // the dataset we are writing to
436  hid_t dataID;
437 
438  if( DoesDataSetExist(name) ) {
439 
440  // open the data
441  dataID = H5Dopen(fileID, name.c_str(), H5P_DEFAULT);
442 
443  // get the dataspace dimension size
444  hsize_t dims[2];
445  H5Sget_simple_extent_dims(H5Dget_space(dataID), dims, nullptr);
446 
447  if( (dimsf[0]!=dims[0]) || (dimsf[1]!=dims[1]) ) { // if the data size changes ...
448  // Extend the dataset
449  H5Dset_extent(dataID, dimsf);
450  }
451 
452  } else {
453 
454  // get the parent path
455  std::string parentPath = GetParentPath(name);
456 
457  // make sure the parent exists (and create it if it does not)
458  CreateGroup(parentPath);
459 
460  // modify data set creation properties --- enable chunking
461  const hid_t prop = H5Pcreate(H5P_DATASET_CREATE);
462 
463  // set chunk size to be (1,1) -- may be a bad idea of the data set is extremely large
464  hsize_t chunk[2] = {std::min<hsize_t>(100,rows), std::min<hsize_t>(100,cols)};
465  H5Pset_chunk(prop, 2, chunk);
466 
467  // create a dataset for each process
468  dataID = H5Dcreate(fileID, name.c_str(), HDF5_Type<scalarType>::GetFlag(), filespace, H5P_DEFAULT, prop, H5P_DEFAULT);
469  assert(dataID>0);
470 
471  // close the creation properties
472  H5Pclose(prop);
473  }
474 
475  // close the filespace
476  H5Sclose(filespace);
477 
478  // close the dataset
479  H5Dclose(dataID);
480  }
481 
483 
490  template<typename scalarType, int fixedRows>
491  void WriteVectorAttribute(std::string const& datasetName,
492  std::string const& attributeName,
493  Eigen::Matrix<scalarType, fixedRows, 1> const& attribute)
494  {
495 
496  // make sure the file is open
497  assert(fileID>0);
498 
499  // Create the group if necessary
500  if( !DoesDataSetExist(datasetName) || !DoesGroupExist(datasetName) )
501  CreateGroup(datasetName);
502 
503  // set the attribute
504  const herr_t set_result = H5LTset_attribute(fileID,
505  datasetName.c_str(),
506  attributeName.c_str(),
508  (void*) attribute.data(),
509  attribute.rows());
510  assert(set_result>=0);
511  }
512 
514 
521  template<typename attType = double>
522  void WriteScalarAttribute(std::string const& datasetName,
523  std::string const& attributeName,
524  attType const& attribute)
525  {
526  // make the scalar a vector
527  Eigen::Matrix<attType, 1, 1> vec(1);
528  vec << attribute;
529 
530  // call the vector attribute version
531  WriteVectorAttribute<attType, 1>(datasetName, attributeName, vec);
532  }
533 
535 
542  void WriteStringAttribute(std::string const& datasetName,
543  std::string const& attributeName,
544  std::string const& attribute);
545 
547 
551  template<typename scalarType>
552  scalarType GetScalarAttribute(std::string const& datasetName,
553  std::string const& attributeName) const {
554 
555  // read as a vector attribute
556  Eigen::Matrix<scalarType, Eigen::Dynamic, 1> att = GetVectorAttribute<scalarType>(datasetName, attributeName);
557  assert(att.size() == 1);
558 
559  // return the sclar
560  return att(0);
561  }
562 
564 
568  template<typename scalarType>
569  Eigen::Matrix<scalarType, Eigen::Dynamic, 1> GetVectorAttribute(std::string const& datasetName,
570  std::string const& attributeName) const
571  {
572  // make sure the file is open
573  assert(fileID>0);
574 
575  // make sure the dataset exists
576  assert(DoesDataSetExist(datasetName) || DoesGroupExist(datasetName));
577 
578  // get the dimensions of the attribute
579  int rank[1];
580  hsize_t dims[2];
581  auto get_ndim = H5LTget_attribute_ndims(fileID, datasetName.c_str(), attributeName.c_str(), rank);
582  assert(get_ndim >= 0);
583  assert(rank[0]==1);
584 
585  H5T_class_t typeClass;
586  size_t typeSize;
587  auto status = H5LTget_attribute_info(fileID, datasetName.c_str(), attributeName.c_str(), dims, &typeClass, &typeSize);
588  assert(status >= 0);
589 
590  // create the vector to return
591  Eigen::Matrix<scalarType, Eigen::Dynamic, 1> data(dims[0]);
592 
593  // get the attribute
594 
595  auto get_att = H5LTget_attribute(fileID, datasetName.c_str(), attributeName.c_str(), HDF5_Type<scalarType>::GetFlag(), data.data());
596  assert(get_att >= 0);
597 
598  // translate it into and Eigen type
599  //Translater<std::vector<scalarType>, Eigen::Matrix<scalarType, Eigen::Dynamic, 1> > trans(data);
600  return data;
601  }
602 
604 
608  std::string GetStringAttribute(std::string const& datasetName, std::string const& attributeName) const;
609 
610 
612  std::vector<std::string> GetChildren(std::string base = "/") const;
613 
615  void FlushFile();
616 
618 
622  void MergeFile(std::shared_ptr<HDF5File> const& otherFile);
623 
625 
628  hid_t fileID = -1;
629 
631 
634  std::string filename = "";
635 
636  private:
637 
638  bool DoesFileExist(const std::string& name) const;
639 
640  };
641 
642 } // namespace Utilities
643 } // namespace muq
644 
645 #endif
A wrapper around HDF5 (and PHDF5) to handle file I/O.
Definition: HDF5File.h:23
Eigen::Matrix< scalarType, Eigen::Dynamic, 1 > GetVectorAttribute(std::string const &datasetName, std::string const &attributeName) const
Read a vector attribute from the HDF5 file.
Definition: HDF5File.h:569
void Open(std::string const &filename_)
Opens or creates the file.
Definition: HDF5File.cpp:30
HDF5File(std::string const &filename_)
Open or create the file.
Definition: HDF5File.cpp:8
bool DoesGroupExist(std::string const &name) const
Check to see if a group exists.
Definition: HDF5File.cpp:93
virtual ~HDF5File()
If HDF5File is destroyed, the file should be closed.
Definition: HDF5File.cpp:17
void Close()
Close the file.
Definition: HDF5File.cpp:58
hid_t fileID
The HDF5 file ID.
Definition: HDF5File.h:628
void CreateDataset(std::string const &name, int rows, int cols)
Create a new empty dataset in the file.
Definition: HDF5File.h:419
void FlushFile()
Flush any data in the HDF5 buffer to the file.
Definition: HDF5File.cpp:226
void CreateGroup(std::string const &name)
Create a new group in the file.
Definition: HDF5File.cpp:167
bool DoesFileExist(const std::string &name) const
Definition: HDF5File.cpp:25
void WriteStringAttribute(std::string const &datasetName, std::string const &attributeName, std::string const &attribute)
Write a string attribute to a dataset or group.
Definition: HDF5File.cpp:190
std::string GetStringAttribute(std::string const &datasetName, std::string const &attributeName) const
Read a string attribute from the HDF5 file.
Definition: HDF5File.cpp:205
void WriteMatrix(std::string name, Eigen::Matrix< scalarType, fixedRows, fixedCols > const &dataset)
Write data set to file.
Definition: HDF5File.h:70
bool IsDataSet(std::string const &name) const
Definition: HDF5File.cpp:285
void Copy(std::string const &destName, std::shared_ptr< HDF5File > srcFile, std::string const &srcName)
Copy the contents of one dataset into another.
Definition: HDF5File.cpp:76
bool IsGroup(std::string const &name) const
Definition: HDF5File.cpp:302
std::vector< std::string > GetChildren(std::string base="/") const
Get a list of immediate children of a group.
Definition: HDF5File.cpp:319
Eigen::VectorXi GetDataSetSize(std::string const name) const
Get the size of a dataset (rows,cols)
Definition: HDF5File.cpp:122
void MergeFile(std::shared_ptr< HDF5File > const &otherFile)
Merge another file into this file.
Definition: HDF5File.cpp:261
bool DoesDataSetExist(std::string const &name) const
Check to see if a data set exists.
Definition: HDF5File.cpp:110
void WritePartialMatrix(std::string const &name, Eigen::Matrix< scalarType, fixedRows, fixedCols > const &data, unsigned int const row, unsigned int const col)
Write part of a data set to file.
Definition: HDF5File.h:171
Eigen::Matrix< scalarType, fixedRows, fixedCols > ReadPartialMatrix(std::string const &name, int rowStart, int colStart, int numRows, int numCols) const
Definition: HDF5File.h:311
void WriteVectorAttribute(std::string const &datasetName, std::string const &attributeName, Eigen::Matrix< scalarType, fixedRows, 1 > const &attribute)
Add a vector attricute to a dataset or group.
Definition: HDF5File.h:491
Eigen::Matrix< scalarType, fixedRows, fixedCols > ReadMatrix(std::string const &name) const
Read a dataset from the file.
Definition: HDF5File.h:239
scalarType GetScalarAttribute(std::string const &datasetName, std::string const &attributeName) const
Read a scalar attribute from the HDF5 file.
Definition: HDF5File.h:552
void WriteScalarAttribute(std::string const &datasetName, std::string const &attributeName, attType const &attribute)
Add a scalar attribute to a dataset or group.
Definition: HDF5File.h:522
std::string filename
The name of the file.
Definition: HDF5File.h:634
herr_t H5LTset_attribute(hid_t loc_id, const char *obj_name, const char *attr_name, hid_t mem_type_id, void *buffer, size_t size)
Definition: HDF5Types.cpp:19
std::string GetParentPath(std::string const &base)
Definition: PathTools.cpp:6