MUQ  0.4.3
HDF5File.cpp
Go to the documentation of this file.
2 
3 #include<iostream>
4 #include<fstream>
5 
6 using namespace muq::Utilities;
7 
8 HDF5File::HDF5File(std::string const& filename_){
9 
10  // make sure the file is not open
11  assert(fileID<0);
12 
13  // create (or open) the file
14  Open(filename_);
15 }
16 
18  // close the file
19  Close();
20 
21  // make sure the file is closed
22  assert(fileID<=0);
23 }
24 
25 bool HDF5File::DoesFileExist(const std::string& name) const {
26  std::ifstream f(name.c_str());
27  return f.good();
28 }
29 
30 void HDF5File::Open(std::string const& filename_) {
31 
32  if( fileID>=0 ) { // if a file is already open ...
33  // ... close it.
34  Close();
35  }
36 
37  // save the file name;
38  filename = filename_;
39 
40  // Set up file access property list with parallel I/O access
41  hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
42 
43  if( DoesFileExist(filename) ){ // if the file exists ...
44  // ... open it.
45  fileID = H5Fopen(filename.c_str(), H5F_ACC_RDWR, plist_id);
46  } else { // if the file does not exist ...
47  // ... create it.
48  fileID = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, plist_id);
49  }
50 
51  // close the property list
52  H5Pclose(plist_id);
53 
54  // make sure the file is open
55  assert(fileID>=0);
56 }
57 
59 
60  if( fileID<0 ) { // if the file is already closed ...
61  // ... do nothing
62  return;
63  }
64 
65  // flush the file
66  FlushFile();
67 
68  // close the file
69  H5Fclose(fileID);
70 
71  // set the file ID to something invalid
72  fileID = -1;
73  filename = "";
74 }
75 
76 void HDF5File::Copy(std::string const& dstName, std::shared_ptr<HDF5File> srcFile, std::string const& srcName)
77 {
78 
79  // make sure both files are open
80  assert(fileID>0);
81  assert(srcFile->fileID>0);
82 
83  herr_t err;
84  err = H5Ocopy(srcFile->fileID, srcName.c_str(), fileID, dstName.c_str(), H5P_DEFAULT, H5P_DEFAULT);
85 
86  if(err<0)
87  {
88  std::cerr << "WARNING: HDF5 could not copy " << srcName << " to " << dstName << std::endl;
89  };
90 
91 }
92 
93 bool HDF5File::DoesGroupExist(std::string const& name) const {
94 
95  // if the group is the root, return true
96  if( (name.compare("/")==0) || (name.compare("")==0) || (name.compare("/.")==0) ) {
97  return true;
98  }
99 
100  // make sure the file is open
101  assert(fileID>0);
102 
103  // get the group path and the path to it's parent
104  std::string parentPath = GetParentPath(name);
105 
106  // recursivly check if the parent exists and make sure the current group exists
107  return DoesGroupExist(parentPath) && (H5Lexists(fileID, name.c_str(), H5P_DEFAULT)>0);
108 }
109 
110 bool HDF5File::DoesDataSetExist(std::string const& name) const {
111 
112  // make sure the file is open
113  assert(fileID>0);
114 
115  // get the group path and the path to it's parent
116  std::string parentPath = GetParentPath(name);
117 
118  // recursivly check if the parent group exists and make sure the current data set exists
119  return DoesGroupExist(parentPath) && (H5Lexists(fileID, name.c_str(), H5P_DEFAULT) > 0);
120 }
121 
122 Eigen::VectorXi HDF5File::GetDataSetSize(std::string const name) const {
123 
124  // make sure the file is open
125  assert(fileID>0);
126 
127  if( !DoesDataSetExist(name) ) { // if the data set does not exist ...
128  // return an empty vector.
129  return Eigen::VectorXi();
130  }
131 
132  // make sure the file is open
133  assert(fileID>0);
134 
135  // open the data
136  hid_t dataset = H5Dopen2(fileID, name.c_str(), H5P_DEFAULT);
137 
138  // get the id for the dataspace of the dataset
139  hid_t space_id = H5Dget_space(dataset);
140 
141  // get the dimensionality of the dataspace
142  int rank = H5Sget_simple_extent_ndims(space_id);
143 
144  // get the dataspace dimension size and the max. size
145  hsize_t* dims = (hsize_t*)malloc(rank*sizeof(hsize_t));
146  hsize_t* max_dims = (hsize_t*)malloc(rank*sizeof(hsize_t));
147  H5Sget_simple_extent_dims(space_id, dims, max_dims);
148 
149  // close the dataspace and the dataset
150  H5Sclose(space_id);
151  H5Dclose(dataset);
152 
153  // convert the dimensionality into an Eigen::VectorXi
154  Eigen::VectorXi output(rank);
155  for( int i=0; i<rank; ++i ) {
156  output(i) = dims[i];
157  }
158 
159  // free the memory
160  free(dims);
161  free(max_dims);
162 
163  // return the dimensionality
164  return output;
165 }
166 
167 void HDF5File::CreateGroup(std::string const& name) {
168 
169  // make sure the file is open
170  assert(fileID>0);
171 
172  if( (DoesGroupExist(name))||(name.compare("")==0)||(name.compare("/")==0) ) { return; }
173 
174  // get the group path and the path to it's parent
175  std::string parentPath = GetParentPath(name);
176 
177  // make sure the parent exists by recursively creating it
178  if(!DoesGroupExist(parentPath))
179  CreateGroup(parentPath);
180 
181  // create the group
182  hid_t newgroup = H5Gcreate2(fileID, name.c_str(), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
183 
184  // close the group
185  H5Gclose(newgroup);
186 
187  FlushFile();
188 }
189 
190 void HDF5File::WriteStringAttribute(std::string const& datasetName,
191  std::string const& attributeName,
192  std::string const& attribute)
193 {
194  // make sure the file is open
195  assert(fileID>0);
196 
197  // Create the group or dataset in necessary
198  if( !DoesDataSetExist(datasetName) || !DoesGroupExist(datasetName) )
199  CreateGroup(datasetName);
200 
201  // write the attribute
202  H5LTset_attribute_string(fileID, datasetName.c_str(), attributeName.c_str(), attribute.c_str());
203 }
204 
205 std::string HDF5File::GetStringAttribute(std::string const& datasetName, std::string const& attributeName) const {
206  /*#if MUQ_MPI==1
207  std::unique_ptr<mpi::communicator> worldComm(new mpi::communicator);
208 
209  assert(worldComm->rank()==write);
210  #endif*/
211 
212  // make sure the file is open
213  assert(fileID>0);
214 
215  // make sure the dataset exists
216  assert(DoesDataSetExist(datasetName) || DoesGroupExist(datasetName));
217 
218  // get the string attribute
219  char tempStr[256];
220  H5LTget_attribute_string(fileID, datasetName.c_str(), attributeName.c_str(), tempStr);
221 
222  // return it as a strng
223  return std::string(tempStr);
224 }
225 
227  if( fileID>0 ) { // if the file is open ...
228  // flush it.
229  H5Fflush(fileID, H5F_SCOPE_GLOBAL);
230  }
231 }
232 
233 struct DataFileInfo {
234  DataFileInfo(std::shared_ptr<HDF5File> const& hdf5file) : hdf5file(hdf5file) {}
235 
236  const std::shared_ptr<HDF5File> hdf5file;
237 };
238 
239 herr_t CopyObjectToGlobalFile(hid_t o_id, const char *name, const H5O_info_t *info, void *op_data) {
240  std::string nameBuffer(name);
241  std::string fullGroupName = "/" + nameBuffer;
242 
243  // get the file we are copying into
244  DataFileInfo* fileInfo = static_cast<DataFileInfo*>(op_data);
245 
246  if( info->type==H5O_TYPE_DATASET ) { // data sets
247  if( !fileInfo->hdf5file->DoesDataSetExist(fullGroupName) ) { // if the data set does not exist ...
248  // ... copy it over
249  H5Ocopy(o_id, name, fileInfo->hdf5file->fileID, fullGroupName.c_str(), H5P_DEFAULT, H5P_DEFAULT);
250  }
251  } else if( info->type == H5O_TYPE_GROUP ) { // groups
252  if( !fileInfo->hdf5file->DoesGroupExist(fullGroupName) ) { // if the group does not exist ...
253  // ... copy it over.
254  H5Ocopy(o_id, name, fileInfo->hdf5file->fileID, fullGroupName.c_str(), H5P_DEFAULT, H5P_DEFAULT);
255  }
256  }
257 
258  return 0;
259 }
260 
261 void HDF5File::MergeFile(std::shared_ptr<HDF5File> const& otherFile) {
262 
263  // make sure the other file is open
264  assert(otherFile->fileID>0);
265 
266  // make sure this file is open
267  assert(fileID>0);
268 
269  // open the root group in the other file
270  const std::string rootGroupName = "/";
271  const hid_t otherRootGroup = H5Gopen2(otherFile->fileID, rootGroupName.c_str(), H5P_DEFAULT);
272 
273  auto dataInfo = std::make_shared<DataFileInfo>(shared_from_this());
274 
275  // copy the file
276  const herr_t status = H5Ovisit(otherRootGroup, H5_INDEX_NAME, H5_ITER_NATIVE, &CopyObjectToGlobalFile, static_cast<void*>(dataInfo.get()));
277 
278  assert(status >= 0);
279 
280  // close the other file's root group
281  H5Gclose(otherRootGroup);
282 }
283 
284 
285 bool HDF5File::IsDataSet(std::string const& name) const
286 {
287 
288  if(!DoesDataSetExist(name))
289  return false;
290 
291  herr_t status;
292  H5O_info_t info;
293 
294  status = H5Oget_info_by_name(fileID, name.c_str(), &info, H5P_DEFAULT);
295 
296  if(status<0)
297  return false;
298 
299  return info.type == H5O_TYPE_DATASET;
300 }
301 
302 bool HDF5File::IsGroup(std::string const& name) const
303 {
304 
305  if(!DoesGroupExist(name))
306  return false;
307 
308  herr_t status;
309  H5O_info_t info;
310 
311  status = H5Oget_info_by_name(fileID, name.c_str(), &info, H5P_DEFAULT);
312 
313  if(status<0)
314  return false;
315 
316  return info.type == H5O_TYPE_GROUP;
317 }
318 
319 std::vector<std::string> HDF5File::GetChildren(std::string base) const
320 {
321  // Make sure the HDF5 file is open
322  assert(fileID>0);
323 
324  if(IsDataSet(base))
325  return std::vector<std::string>();
326 
327  // Make sure the group exists
328  assert(DoesGroupExist(base));
329 
330  // open the group
331  hid_t gid = H5Gopen2(fileID, base.c_str(), H5P_DEFAULT);
332 
333  char name[1024];
334  ssize_t len;
335  hsize_t nobj;
336 
337  // get the number of objects in this group
338  herr_t status = H5Gget_num_objs(gid, &nobj);
339 
340  // Intialize the vector of strings
341  std::vector<std::string> output(nobj);
342 
343  // Fill in the output vector
344  for(int i = 0; i < nobj; i++)
345  {
346  len = H5Gget_objname_by_idx(gid, (hsize_t)i, name, (size_t)1024);
347  output.at(i) = std::string(name,name + len);
348  }
349 
350  return output;
351 
352 };
herr_t CopyObjectToGlobalFile(hid_t o_id, const char *name, const H5O_info_t *info, void *op_data)
Definition: HDF5File.cpp:239
void Open(std::string const &filename_)
Opens or creates the file.
Definition: HDF5File.cpp:30
HDF5File(std::string const &filename_)
Open or create the file.
Definition: HDF5File.cpp:8
bool DoesGroupExist(std::string const &name) const
Check to see if a group exists.
Definition: HDF5File.cpp:93
virtual ~HDF5File()
If HDF5File is destroyed, the file should be closed.
Definition: HDF5File.cpp:17
void Close()
Close the file.
Definition: HDF5File.cpp:58
hid_t fileID
The HDF5 file ID.
Definition: HDF5File.h:628
void FlushFile()
Flush any data in the HDF5 buffer to the file.
Definition: HDF5File.cpp:226
void CreateGroup(std::string const &name)
Create a new group in the file.
Definition: HDF5File.cpp:167
bool DoesFileExist(const std::string &name) const
Definition: HDF5File.cpp:25
void WriteStringAttribute(std::string const &datasetName, std::string const &attributeName, std::string const &attribute)
Write a string attribute to a dataset or group.
Definition: HDF5File.cpp:190
std::string GetStringAttribute(std::string const &datasetName, std::string const &attributeName) const
Read a string attribute from the HDF5 file.
Definition: HDF5File.cpp:205
bool IsDataSet(std::string const &name) const
Definition: HDF5File.cpp:285
void Copy(std::string const &destName, std::shared_ptr< HDF5File > srcFile, std::string const &srcName)
Copy the contents of one dataset into another.
Definition: HDF5File.cpp:76
bool IsGroup(std::string const &name) const
Definition: HDF5File.cpp:302
std::vector< std::string > GetChildren(std::string base="/") const
Get a list of immediate children of a group.
Definition: HDF5File.cpp:319
Eigen::VectorXi GetDataSetSize(std::string const name) const
Get the size of a dataset (rows,cols)
Definition: HDF5File.cpp:122
void MergeFile(std::shared_ptr< HDF5File > const &otherFile)
Merge another file into this file.
Definition: HDF5File.cpp:261
bool DoesDataSetExist(std::string const &name) const
Check to see if a data set exists.
Definition: HDF5File.cpp:110
std::string filename
The name of the file.
Definition: HDF5File.h:634
__int64 ssize_t
Definition: httplib.h:113
std::string GetParentPath(std::string const &base)
Definition: PathTools.cpp:6