Goby3  3.1.5a
2024.05.23
hdf5.h
Go to the documentation of this file.
1 // Copyright 2016-2023:
2 // GobySoft, LLC (2013-)
3 // Community contributors (see AUTHORS file)
4 // File authors:
5 // Toby Schneider <toby@gobysoft.org>
6 //
7 //
8 // This file is part of the Goby Underwater Autonomy Project Libraries
9 // ("The Goby Libraries").
10 //
11 // The Goby Libraries are free software: you can redistribute them and/or modify
12 // them under the terms of the GNU Lesser General Public License as published by
13 // the Free Software Foundation, either version 2.1 of the License, or
14 // (at your option) any later version.
15 //
16 // The Goby Libraries are distributed in the hope that they will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 // GNU Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public License
22 // along with Goby. If not, see <http://www.gnu.org/licenses/>.
23 
24 #ifndef GOBY_MIDDLEWARE_LOG_HDF5_HDF5_H
25 #define GOBY_MIDDLEWARE_LOG_HDF5_HDF5_H
26 
27 #include <algorithm> // for max
28 #include <cstdint> // for uint64_t
29 #include <deque> // for deque
30 #include <map> // for map, multimap
31 #include <memory> // for shared_ptr
32 #include <string> // for string
33 #include <utility> // for move
34 #include <vector> // for vector
35 
36 #include <H5Cpp.h>
37 #include <google/protobuf/descriptor.h> // for FieldDescriptor
38 #include <google/protobuf/message.h> // for Message, Reflection
39 
40 #include "hdf5_predicate.h" // for predicate
41 #include "hdf5_protobuf_values.h" // for PBMeta, retrieve_default_value
42 
43 namespace goby
44 {
45 namespace middleware
46 {
47 struct HDF5ProtobufEntry;
48 
49 namespace hdf5
50 {
52 {
53  MessageCollection(std::string n, std::string parent_group)
54  : name(std::move(n)), group(parent_group + "/" + name)
55  {
56  }
57  std::string name;
58  std::string group;
59 
60  // time -> ProtobufEntry
61  std::multimap<std::uint64_t, HDF5ProtobufEntry> entries;
62 };
63 
64 struct Channel
65 {
66  Channel(std::string n) : name(std::move(n)), group("/" + name) {}
67  std::string name;
68  std::string group;
69 
70  // returns number of messages in the collection
72 
73  // message name -> hdf5::Message
74  std::map<std::string, MessageCollection> entries;
75 };
76 
77 // keeps track of HDF5 groups for us, and creates them as needed
79 {
80  public:
81  GroupFactory(H5::H5File& h5file) : root_group_(h5file.openGroup("/")) {}
82 
83  // creates or opens group
84  H5::Group& fetch_group(const std::string& group_path);
85 
86  private:
87  class GroupWrapper
88  {
89  public:
90  // for root group (already exists)
91  GroupWrapper(const H5::Group& group) : group_(group) {}
92 
93  // for children groups
94  GroupWrapper(const std::string& name, H5::Group& parent) : group_(parent.createGroup(name))
95  {
96  }
97 
98  H5::Group& fetch_group(std::deque<std::string>& nodes);
99 
100  private:
101  H5::Group group_;
102  std::map<std::string, GroupWrapper> children_;
103  };
104  GroupWrapper root_group_;
105 };
106 
107 class Writer
108 {
109  public:
110  Writer(const std::string& output_file, bool write_zero_length_dim = true,
111  bool use_chunks = false, hsize_t chunk_length = 0, bool use_compression = false,
112  int compression_level = 0);
113 
115 
116  void write(bool final_write = true);
117 
118  private:
119  void write_channel(const goby::middleware::hdf5::Channel& channel);
120  void write_channel_chunk_and_clear(goby::middleware::hdf5::Channel& channel);
121  void
122  write_message_collection(const goby::middleware::hdf5::MessageCollection& message_collection);
123  void write_time(const std::string& group,
124  const goby::middleware::hdf5::MessageCollection& message_collection);
125  void write_scheme(const std::string& group,
126  const goby::middleware::hdf5::MessageCollection& message_collection);
127 
128  void write_field_selector(const std::string& group,
129  const google::protobuf::FieldDescriptor* field_desc,
130  const std::vector<const google::protobuf::Message*>& messages,
131  std::vector<hsize_t>& hs);
132 
133  void write_enum_attributes(const std::string& group,
134  const google::protobuf::FieldDescriptor* field_desc);
135 
136  template <typename T>
137  void write_field(const std::string& group, const google::protobuf::FieldDescriptor* field_desc,
138  const std::vector<const google::protobuf::Message*>& messages,
139  std::vector<hsize_t>& hs);
140 
141  void write_embedded_message(const std::string& group,
142  const google::protobuf::FieldDescriptor* field_desc,
143  const std::vector<const google::protobuf::Message*> messages,
144  std::vector<hsize_t>& hs);
145 
146  template <typename T>
147  void write_vector(const std::string& group, const std::string dataset_name,
148  const std::vector<T>& data, const std::vector<hsize_t>& hs,
149  const T& default_value, T empty_value = retrieve_empty_value<T>());
150 
151  void write_vector(const std::string& group, const std::string& dataset_name,
152  const std::vector<std::string>& data, const std::vector<hsize_t>& hs,
153  const std::string& default_value);
154 
155  std::string dim_str(const std::vector<hsize_t>& hs)
156  {
157  std::string d;
158  for (const auto& h : hs) d += std::to_string(h) + ",";
159  d.pop_back();
160  return d;
161  }
162 
163  private:
164  // channel name -> hdf5::Channel
165  std::map<std::string, goby::middleware::hdf5::Channel> channels_;
166  H5::H5File h5file_;
168  bool write_zero_length_dim_;
169  bool use_chunks_;
170  hsize_t chunk_length_;
171  bool use_compression_;
172  int compression_level_;
173  bool final_write_;
174 };
175 
176 template <typename T>
177 void Writer::write_field(const std::string& group,
178  const google::protobuf::FieldDescriptor* field_desc,
179  const std::vector<const google::protobuf::Message*>& messages,
180  std::vector<hsize_t>& hs)
181 {
182  if (field_desc->is_repeated())
183  {
184  // pass one to figure out field size
185  int max_field_size = 0;
186  for (auto message : messages)
187  {
188  if (message)
189  {
190  const google::protobuf::Reflection* refl = message->GetReflection();
191  int field_size = refl->FieldSize(*message, field_desc);
192  if (field_size > max_field_size)
193  max_field_size = field_size;
194  }
195  }
196 
197  hs.push_back(max_field_size);
198 
199  std::vector<T> values(messages.size() * max_field_size, retrieve_empty_value<T>());
200 
201  for (unsigned i = 0, n = messages.size(); i < n; ++i)
202  {
203  if (messages[i])
204  {
205  const google::protobuf::Reflection* refl = messages[i]->GetReflection();
206  int field_size = refl->FieldSize(*messages[i], field_desc);
207  for (int j = 0; j < field_size; ++j)
208  retrieve_repeated_value<T>(&values[i * max_field_size + j], j,
209  PBMeta(refl, field_desc, (*messages[i])));
210  }
211  }
212 
213  T default_value;
214  retrieve_default_value(&default_value, field_desc);
215 
216  write_vector(group, field_desc->name(), values, hs, default_value);
217 
218  hs.pop_back();
219  }
220  else
221  {
222  std::vector<T> values(messages.size(), retrieve_empty_value<T>());
223  for (unsigned i = 0, n = messages.size(); i < n; ++i)
224  {
225  if (messages[i])
226  {
227  const google::protobuf::Reflection* refl = messages[i]->GetReflection();
228  retrieve_single_value<T>(&values[i], PBMeta(refl, field_desc, (*messages[i])));
229  }
230  }
231 
232  T default_value;
233  retrieve_default_value(&default_value, field_desc);
234 
235  write_vector(group, field_desc->name(), values, hs, default_value);
236  }
237 }
238 
239 template <typename T>
240 void Writer::write_vector(const std::string& group, const std::string dataset_name,
241  const std::vector<T>& data, const std::vector<hsize_t>& hs,
242  const T& default_value, T empty_value)
243 {
244  std::unique_ptr<H5::DataSpace> dataspace;
245  H5::Group& grp = group_factory_.fetch_group(group);
246 
247  auto maxhs = hs;
248  H5::DSetCreatPropList prop;
249  bool ds_exists = grp.exists(dataset_name);
250  if (!ds_exists)
251  {
252  if (use_chunks_ && !final_write_)
253  {
254  // all dimensions may change
255  for (auto& m : maxhs) m = H5S_UNLIMITED;
256  auto chunkhs = hs;
257  chunkhs.front() = chunk_length_;
258 
259  // set all chunk dimensions to at least 1
260  for (auto& s : chunkhs)
261  {
262  if (s == 0)
263  s = 1;
264  }
265 
266  glog.is_debug2() && glog << "Setting chunks to " << dim_str(chunkhs) << std::endl;
267  prop.setChunk(chunkhs.size(), chunkhs.data());
268  prop.setFillValue(predicate<T>(), &empty_value);
269  if (use_compression_)
270  prop.setDeflate(compression_level_);
271  }
272 
273  }
274 
275  if (data.size() || write_zero_length_dim_)
276  dataspace = std::make_unique<H5::DataSpace>(hs.size(), hs.data(), maxhs.data());
277  else
278  dataspace = std::make_unique<H5::DataSpace>(H5S_NULL);
279 
280  H5::DataSet dataset = ds_exists
281  ? grp.openDataSet(dataset_name)
282  : grp.createDataSet(dataset_name, predicate<T>(), *dataspace, prop);
283 
284  if (ds_exists)
285  {
286  H5::DataSpace existing_space(dataset.getSpace());
287  std::vector<hsize_t> existing_hs(existing_space.getSimpleExtentNdims());
288  existing_space.getSimpleExtentDims(&existing_hs[0]);
289  glog.is_debug2() && glog << "Existing dimensions are: " << dim_str(existing_hs)
290  << std::endl;
291 
292  // new size is larger of the existing or new vectors, except for the last dimension which is the sum
293  std::vector<hsize_t> new_size(hs.size(), 0);
294  for (int i = 0, n = new_size.size(); i < n; ++i)
295  new_size[i] = std::max(hs[i], existing_hs[i]);
296  new_size.front() = existing_hs.front() + hs.front();
297 
298  glog.is_debug2() && glog << "Extending dimensions to: " << dim_str(new_size) << std::endl;
299  dataset.extend(new_size.data());
300 
301  auto& memspace = dataspace;
302  H5::DataSpace filespace(dataset.getSpace());
303  std::vector<hsize_t> offset(hs.size(), 0);
304  offset.front() = existing_hs.front();
305 
306  glog.is_debug2() && glog << "Selecting offset of: " << dim_str(offset) << std::endl;
307 
308  filespace.selectHyperslab(H5S_SELECT_SET, hs.data(), offset.data());
309  if (data.size())
310  dataset.write(&data[0], predicate<T>(), *memspace, filespace);
311  }
312  else
313  {
314  if (data.size())
315  dataset.write(&data[0], predicate<T>());
316  }
317 
318  const char* default_value_attr_name = "default_value";
319  if (!dataset.attrExists(default_value_attr_name))
320  {
321  const int rank = 1;
322  hsize_t att_hs[] = {1};
323  H5::DataSpace att_space(rank, att_hs, att_hs);
324  H5::Attribute att =
325  dataset.createAttribute(default_value_attr_name, predicate<T>(), att_space);
326  att.write(predicate<T>(), &default_value);
327  }
328 }
329 } // namespace hdf5
330 } // namespace middleware
331 } // namespace goby
332 
333 #endif
goby::middleware::hdf5::MessageCollection
Definition: hdf5.h:51
goby::middleware::hdf5::MessageCollection::group
std::string group
Definition: hdf5.h:58
hdf5_protobuf_values.h
goby::middleware::hdf5::Writer
Definition: hdf5.h:107
goby
The global namespace for the Goby project.
Definition: acomms_constants.h:33
goby::middleware::hdf5::MessageCollection::MessageCollection
MessageCollection(std::string n, std::string parent_group)
Definition: hdf5.h:53
goby::middleware::hdf5::Channel::group
std::string group
Definition: hdf5.h:68
group
goby::util::logger::GroupSetter group(std::string n)
Definition: logger_manipulators.h:134
google::protobuf::Reflection
Definition: message.h:412
goby::middleware::hdf5::retrieve_default_value
void retrieve_default_value(T *val, const google::protobuf::FieldDescriptor *field_desc)
goby::util::FlexOstream::is_debug2
bool is_debug2()
Definition: flex_ostream.h:85
google::protobuf::Reflection::FieldSize
virtual int FieldSize(const Message &message, const FieldDescriptor *field) const =0
goby::middleware::groups::gpsd::att
constexpr goby::middleware::Group att("goby::middleware::groups::gpsd::att")
goby::middleware::hdf5::Channel::entries
std::map< std::string, MessageCollection > entries
Definition: hdf5.h:74
goby::middleware::hdf5::Writer::add_entry
void add_entry(goby::middleware::HDF5ProtobufEntry entry)
goby::middleware::hdf5::Channel::Channel
Channel(std::string n)
Definition: hdf5.h:66
message.h
goby::middleware::hdf5::GroupFactory
Definition: hdf5.h:78
to_string
NLOHMANN_BASIC_JSON_TPL_DECLARATION std::string to_string(const NLOHMANN_BASIC_JSON_TPL &j)
user-defined to_string function for JSON values
Definition: json.hpp:24301
goby::middleware::hdf5::MessageCollection::entries
std::multimap< std::uint64_t, HDF5ProtobufEntry > entries
Definition: hdf5.h:61
goby::middleware::hdf5::Channel
Definition: hdf5.h:64
goby::middleware::hdf5::Writer::Writer
Writer(const std::string &output_file, bool write_zero_length_dim=true, bool use_chunks=false, hsize_t chunk_length=0, bool use_compression=false, int compression_level=0)
goby::middleware::hdf5::MessageCollection::name
std::string name
Definition: hdf5.h:57
goby::middleware::hdf5::GroupFactory::fetch_group
H5::Group & fetch_group(const std::string &group_path)
goby::middleware::hdf5::Channel::name
std::string name
Definition: hdf5.h:67
goby::middleware::hdf5::Channel::add_message
size_t add_message(const goby::middleware::HDF5ProtobufEntry &entry)
goby::glog
util::FlexOstream glog
Access the Goby logger through this object.
goby::middleware::HDF5ProtobufEntry
Represents an entry in a HDF5 scientific data file converted from a Google Protocol Buffers message.
Definition: hdf5_plugin.h:41
goby::middleware::hdf5::GroupFactory::GroupFactory
GroupFactory(H5::H5File &h5file)
Definition: hdf5.h:81
goby::middleware::hdf5::Writer::write
void write(bool final_write=true)
hdf5_predicate.h