Goby3 3.2.3
2025.05.13
Loading...
Searching...
No Matches
hdf5.h
Go to the documentation of this file.
1// Copyright 2016-2023:
2// GobySoft, LLC (2013-)
3// Community contributors (see AUTHORS file)
4// File authors:
5// Toby Schneider <toby@gobysoft.org>
6//
7//
8// This file is part of the Goby Underwater Autonomy Project Libraries
9// ("The Goby Libraries").
10//
11// The Goby Libraries are free software: you can redistribute them and/or modify
12// them under the terms of the GNU Lesser General Public License as published by
13// the Free Software Foundation, either version 2.1 of the License, or
14// (at your option) any later version.
15//
16// The Goby Libraries are distributed in the hope that they will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19// GNU Lesser General Public License for more details.
20//
21// You should have received a copy of the GNU Lesser General Public License
22// along with Goby. If not, see <http://www.gnu.org/licenses/>.
23
24#ifndef GOBY_MIDDLEWARE_LOG_HDF5_HDF5_H
25#define GOBY_MIDDLEWARE_LOG_HDF5_HDF5_H
26
27#include <algorithm> // for max
28#include <cstdint> // for uint64_t
29#include <deque> // for deque
30#include <map> // for map, multimap
31#include <memory> // for shared_ptr
32#include <string> // for string
33#include <utility> // for move
34#include <vector> // for vector
35
36#include <H5Cpp.h>
37#include <google/protobuf/descriptor.h> // for FieldDescriptor
38#include <google/protobuf/message.h> // for Message, Reflection
39
40#include "hdf5_predicate.h" // for predicate
41#include "hdf5_protobuf_values.h" // for PBMeta, retrieve_default_value
42
43namespace goby
44{
45namespace middleware
46{
47struct HDF5ProtobufEntry;
48
49namespace hdf5
50{
52{
53 MessageCollection(std::string n, std::string parent_group)
54 : name(std::move(n)), group(parent_group + "/" + name)
55 {
56 }
57 std::string name;
58 std::string group;
59
60 // time -> ProtobufEntry
61 std::multimap<std::uint64_t, HDF5ProtobufEntry> entries;
62};
63
64struct Channel
65{
66 Channel(std::string n) : name(std::move(n)), group("/" + name) {}
67 std::string name;
68 std::string group;
69
70 // returns number of messages in the collection
72
73 // message name -> hdf5::Message
74 std::map<std::string, MessageCollection> entries;
75};
76
77// keeps track of HDF5 groups for us, and creates them as needed
79{
80 public:
81 GroupFactory(H5::H5File& h5file) : root_group_(h5file.openGroup("/")) {}
82
83 // creates or opens group
84 H5::Group& fetch_group(const std::string& group_path);
85
86 private:
87 class GroupWrapper
88 {
89 public:
90 // for root group (already exists)
91 GroupWrapper(const H5::Group& group) : group_(group) {}
92
93 // for children groups
94 GroupWrapper(const std::string& name, H5::Group& parent) : group_(parent.createGroup(name))
95 {
96 }
97
98 H5::Group& fetch_group(std::deque<std::string>& nodes);
99
100 private:
101 H5::Group group_;
102 std::map<std::string, GroupWrapper> children_;
103 };
104 GroupWrapper root_group_;
105};
106
108{
109 public:
110 Writer(const std::string& output_file, bool write_zero_length_dim = true,
111 bool use_chunks = false, hsize_t chunk_length = 0, bool use_compression = false,
112 int compression_level = 0);
113
115
116 void write(bool final_write = true);
117
118 private:
119 void write_channel(const goby::middleware::hdf5::Channel& channel);
120 void write_channel_chunk_and_clear(goby::middleware::hdf5::Channel& channel);
121 void
122 write_message_collection(const goby::middleware::hdf5::MessageCollection& message_collection);
123 void write_time(const std::string& group,
124 const goby::middleware::hdf5::MessageCollection& message_collection);
125 void write_scheme(const std::string& group,
126 const goby::middleware::hdf5::MessageCollection& message_collection);
127
128 void write_field_selector(const std::string& group,
129 const google::protobuf::FieldDescriptor* field_desc,
130 const std::vector<const google::protobuf::Message*>& messages,
131 std::vector<hsize_t>& hs);
132
133 void write_enum_attributes(const std::string& group,
134 const google::protobuf::FieldDescriptor* field_desc);
135
136 template <typename T>
137 void write_field(const std::string& group, const google::protobuf::FieldDescriptor* field_desc,
138 const std::vector<const google::protobuf::Message*>& messages,
139 std::vector<hsize_t>& hs);
140
141 void write_embedded_message(const std::string& group,
142 const google::protobuf::FieldDescriptor* field_desc,
143 const std::vector<const google::protobuf::Message*> messages,
144 std::vector<hsize_t>& hs);
145
146 template <typename T>
147 void write_vector(const std::string& group, const std::string dataset_name,
148 const std::vector<T>& data, const std::vector<hsize_t>& hs,
149 const T& default_value, T empty_value = retrieve_empty_value<T>());
150
151 void write_vector(const std::string& group, const std::string& dataset_name,
152 const std::vector<std::string>& data, const std::vector<hsize_t>& hs,
153 const std::string& default_value);
154
155 std::string dim_str(const std::vector<hsize_t>& hs)
156 {
157 std::string d;
158 for (const auto& h : hs) d += std::to_string(h) + ",";
159 d.pop_back();
160 return d;
161 }
162
163 private:
164 // channel name -> hdf5::Channel
165 std::map<std::string, goby::middleware::hdf5::Channel> channels_;
166 H5::H5File h5file_;
168 bool write_zero_length_dim_;
169 bool use_chunks_;
170 hsize_t chunk_length_;
171 bool use_compression_;
172 int compression_level_;
173 bool final_write_;
174};
175
176template <typename T>
177void Writer::write_field(const std::string& group,
178 const google::protobuf::FieldDescriptor* field_desc,
179 const std::vector<const google::protobuf::Message*>& messages,
180 std::vector<hsize_t>& hs)
181{
182 if (field_desc->is_repeated())
183 {
184 // pass one to figure out field size
185 int max_field_size = 0;
186 for (auto message : messages)
187 {
188 if (message)
189 {
190 const google::protobuf::Reflection* refl = message->GetReflection();
191 int field_size = refl->FieldSize(*message, field_desc);
192 if (field_size > max_field_size)
193 max_field_size = field_size;
194 }
195 }
196
197 hs.push_back(max_field_size);
198
199 std::vector<T> values(messages.size() * max_field_size, retrieve_empty_value<T>());
200
201 for (unsigned i = 0, n = messages.size(); i < n; ++i)
202 {
203 if (messages[i])
204 {
205 const google::protobuf::Reflection* refl = messages[i]->GetReflection();
206 int field_size = refl->FieldSize(*messages[i], field_desc);
207 for (int j = 0; j < field_size; ++j)
208 retrieve_repeated_value<T>(&values[i * max_field_size + j], j,
209 PBMeta(refl, field_desc, (*messages[i])));
210 }
211 }
212
213 T default_value;
214 retrieve_default_value(&default_value, field_desc);
215
216 write_vector(group, field_desc->name(), values, hs, default_value);
217
218 hs.pop_back();
219 }
220 else
221 {
222 std::vector<T> values(messages.size(), retrieve_empty_value<T>());
223 for (unsigned i = 0, n = messages.size(); i < n; ++i)
224 {
225 if (messages[i])
226 {
227 const google::protobuf::Reflection* refl = messages[i]->GetReflection();
228 retrieve_single_value<T>(&values[i], PBMeta(refl, field_desc, (*messages[i])));
229 }
230 }
231
232 T default_value;
233 retrieve_default_value(&default_value, field_desc);
234
235 write_vector(group, field_desc->name(), values, hs, default_value);
236 }
237}
238
239template <typename T>
240void Writer::write_vector(const std::string& group, const std::string dataset_name,
241 const std::vector<T>& data, const std::vector<hsize_t>& hs,
242 const T& default_value, T empty_value)
243{
244 std::unique_ptr<H5::DataSpace> dataspace;
245 H5::Group& grp = group_factory_.fetch_group(group);
246
247 auto maxhs = hs;
248 H5::DSetCreatPropList prop;
249 bool ds_exists = grp.exists(dataset_name);
250 if (!ds_exists)
251 {
252 if (use_chunks_ && !final_write_)
253 {
254 // all dimensions may change
255 for (auto& m : maxhs) m = H5S_UNLIMITED;
256 auto chunkhs = hs;
257 chunkhs.front() = chunk_length_;
258
259 // set all chunk dimensions to at least 1
260 for (auto& s : chunkhs)
261 {
262 if (s == 0)
263 s = 1;
264 }
265
266 glog.is_debug2() && glog << "Setting chunks to " << dim_str(chunkhs) << std::endl;
267 prop.setChunk(chunkhs.size(), chunkhs.data());
268 prop.setFillValue(predicate<T>(), &empty_value);
269 if (use_compression_)
270 prop.setDeflate(compression_level_);
271 }
272
273 }
274
275 if (data.size() || write_zero_length_dim_)
276 dataspace = std::make_unique<H5::DataSpace>(hs.size(), hs.data(), maxhs.data());
277 else
278 dataspace = std::make_unique<H5::DataSpace>(H5S_NULL);
279
280 H5::DataSet dataset = ds_exists
281 ? grp.openDataSet(dataset_name)
282 : grp.createDataSet(dataset_name, predicate<T>(), *dataspace, prop);
283
284 if (ds_exists)
285 {
286 H5::DataSpace existing_space(dataset.getSpace());
287 std::vector<hsize_t> existing_hs(existing_space.getSimpleExtentNdims());
288 existing_space.getSimpleExtentDims(&existing_hs[0]);
289 glog.is_debug2() && glog << "Existing dimensions are: " << dim_str(existing_hs)
290 << std::endl;
291
292 // new size is larger of the existing or new vectors, except for the last dimension which is the sum
293 std::vector<hsize_t> new_size(hs.size(), 0);
294 for (int i = 0, n = new_size.size(); i < n; ++i)
295 new_size[i] = std::max(hs[i], existing_hs[i]);
296 new_size.front() = existing_hs.front() + hs.front();
297
298 glog.is_debug2() && glog << "Extending dimensions to: " << dim_str(new_size) << std::endl;
299 dataset.extend(new_size.data());
300
301 auto& memspace = dataspace;
302 H5::DataSpace filespace(dataset.getSpace());
303 std::vector<hsize_t> offset(hs.size(), 0);
304 offset.front() = existing_hs.front();
305
306 glog.is_debug2() && glog << "Selecting offset of: " << dim_str(offset) << std::endl;
307
308 filespace.selectHyperslab(H5S_SELECT_SET, hs.data(), offset.data());
309 if (data.size())
310 dataset.write(&data[0], predicate<T>(), *memspace, filespace);
311 }
312 else
313 {
314 if (data.size())
315 dataset.write(&data[0], predicate<T>());
316 }
317
318 const char* default_value_attr_name = "default_value";
319 if (!dataset.attrExists(default_value_attr_name))
320 {
321 const int rank = 1;
322 hsize_t att_hs[] = {1};
323 H5::DataSpace att_space(rank, att_hs, att_hs);
324 H5::Attribute att =
325 dataset.createAttribute(default_value_attr_name, predicate<T>(), att_space);
326 att.write(predicate<T>(), &default_value);
327 }
328}
329} // namespace hdf5
330} // namespace middleware
331} // namespace goby
332
333#endif
H5::Group & fetch_group(const std::string &group_path)
GroupFactory(H5::H5File &h5file)
Definition hdf5.h:81
Writer(const std::string &output_file, bool write_zero_length_dim=true, bool use_chunks=false, hsize_t chunk_length=0, bool use_compression=false, int compression_level=0)
void add_entry(goby::middleware::HDF5ProtobufEntry entry)
void write(bool final_write=true)
int FieldSize(const Message &message, const FieldDescriptor *field) const
goby::util::logger::GroupSetter group(std::string n)
constexpr goby::middleware::Group att("goby::middleware::groups::gpsd::att")
void retrieve_default_value(T *val, const google::protobuf::FieldDescriptor *field_desc)
std::string to_string(goby::middleware::protobuf::Layer layer)
Definition common.h:44
The global namespace for the Goby project.
util::FlexOstream glog
Access the Goby logger through this object.
STL namespace.
Represents an entry in a HDF5 scientific data file converted from a Google Protocol Buffers message.
Definition hdf5_plugin.h:42
std::map< std::string, MessageCollection > entries
Definition hdf5.h:74
size_t add_message(const goby::middleware::HDF5ProtobufEntry &entry)
Channel(std::string n)
Definition hdf5.h:66
MessageCollection(std::string n, std::string parent_group)
Definition hdf5.h:53
std::multimap< std::uint64_t, HDF5ProtobufEntry > entries
Definition hdf5.h:61