TECA
teca_netcdf_util.h
1 #ifndef teca_netcdf_util_h
2 #define teca_netcdf_util_h
3 
4 #include "teca_config.h"
5 #include "teca_mpi.h"
6 #include "teca_metadata.h"
7 #include "teca_thread_pool.h"
8 
9 #include <mutex>
10 #include <string>
11 
12 #include <netcdf.h>
13 #if defined(TECA_HAS_NETCDF_MPI)
14 #include <netcdf_par.h>
15 #endif
16 
17 // macro to help with netcdf data types
18 #define NC_DISPATCH_FP(tc_, code_) \
19  switch (tc_) \
20  { \
21  NC_DISPATCH_CASE(NC_FLOAT, float, code_) \
22  NC_DISPATCH_CASE(NC_DOUBLE, double, code_) \
23  default: \
24  TECA_ERROR("netcdf type code_ " << tc_ \
25  << " is not a floating point type") \
26  }
27 
28 #define NC_DISPATCH(tc_, code_) \
29  switch (tc_) \
30  { \
31  NC_DISPATCH_CASE(NC_BYTE, char, code_) \
32  NC_DISPATCH_CASE(NC_UBYTE, unsigned char, code_) \
33  NC_DISPATCH_CASE(NC_CHAR, char, code_) \
34  NC_DISPATCH_CASE(NC_SHORT, short int, code_) \
35  NC_DISPATCH_CASE(NC_USHORT, unsigned short int, code_) \
36  NC_DISPATCH_CASE(NC_INT, int, code_) \
37  NC_DISPATCH_CASE(NC_UINT, unsigned int, code_) \
38  NC_DISPATCH_CASE(NC_INT64, long long, code_) \
39  NC_DISPATCH_CASE(NC_UINT64, unsigned long long, code_) \
40  NC_DISPATCH_CASE(NC_FLOAT, float, code_) \
41  NC_DISPATCH_CASE(NC_DOUBLE, double, code_) \
42  default: \
43  TECA_ERROR("netcdf type code " << tc_ \
44  << " is not supported") \
45  }
46 
47 #define NC_DISPATCH_CASE(cc_, tt_, code_) \
48  case cc_: \
49  { \
50  using NC_T = tt_; \
51  code_ \
52  break; \
53  }
54 
55 namespace teca_netcdf_util
56 {
57 
58 // traits class mapping to/from netcdf
59 template<typename num_t> class netcdf_tt {};
60 template<int nc_enum> class cpp_tt {};
61 
62 #define DECLARE_NETCDF_TT(cpp_t_, nc_c_) \
63 template <> class netcdf_tt<cpp_t_> \
64 { \
65 public: \
66  enum { type_code = nc_c_ }; \
67  static const char *name() { return #nc_c_; } \
68 };
69 DECLARE_NETCDF_TT(char, NC_BYTE)
70 DECLARE_NETCDF_TT(unsigned char, NC_UBYTE)
71 //DECLARE_NETCDF_TT(char, NC_CHAR)
72 DECLARE_NETCDF_TT(short int, NC_SHORT)
73 DECLARE_NETCDF_TT(unsigned short int, NC_USHORT)
74 DECLARE_NETCDF_TT(int, NC_INT)
75 DECLARE_NETCDF_TT(long, NC_LONG)
76 DECLARE_NETCDF_TT(unsigned long, NC_LONG)
77 DECLARE_NETCDF_TT(unsigned int, NC_UINT)
78 DECLARE_NETCDF_TT(long long, NC_INT64)
79 DECLARE_NETCDF_TT(unsigned long long, NC_UINT64)
80 DECLARE_NETCDF_TT(float, NC_FLOAT)
81 DECLARE_NETCDF_TT(double, NC_DOUBLE)
82 
83 #define DECLARE_CPP_TT(cpp_t_, nc_c_) \
84 template <> class cpp_tt<nc_c_> \
85 { \
86 public: \
87  using type = cpp_t_; \
88  static const char *name() { return #cpp_t_; } \
89 };
90 DECLARE_CPP_TT(char, NC_BYTE)
91 DECLARE_CPP_TT(unsigned char, NC_UBYTE)
92 //DECLARE_CPP_TT(char, NC_CHAR)
93 DECLARE_CPP_TT(short int, NC_SHORT)
94 DECLARE_CPP_TT(unsigned short int, NC_USHORT)
95 DECLARE_CPP_TT(int, NC_INT)
96 //DECLARE_CPP_TT(long, NC_LONG)
97 //DECLARE_CPP_TT(unsigned long, NC_LONG)
98 DECLARE_CPP_TT(unsigned int, NC_UINT)
99 DECLARE_CPP_TT(long long, NC_INT64)
100 DECLARE_CPP_TT(unsigned long long, NC_UINT64)
101 DECLARE_CPP_TT(float, NC_FLOAT)
102 DECLARE_CPP_TT(double, NC_DOUBLE)
103 
104 // to deal with fortran fixed length strings
105 // which are not properly nulll terminated
106 void crtrim(char *s, long n);
107 
108 // NetCDF 3 is not threadsafe. The HDF5 C-API can be compiled to be threadsafe,
109 // but it is usually not. NetCDF uses HDF5-HL API to access HDF5, but HDF5-HL
110 // API is not threadsafe without the --enable-unsupported flag. For all those
111 // reasons it's best for the time being to protect all NetCDF I/O.
112 std::mutex &get_netcdf_mutex();
113 
114 // RAII for managing netcdf files
116 {
117 public:
118  netcdf_handle() : m_handle(0)
119  {}
120 
121  // initialize with a handle returned from
122  // nc_open/nc_create etc
123  netcdf_handle(int h) : m_handle(h)
124  {}
125 
126  // close the file during destruction
127  ~netcdf_handle()
128  { this->close(); }
129 
130  // this is a move only class, and should
131  // only be initialized with an valid handle
132  netcdf_handle(const netcdf_handle &) = delete;
133  void operator=(const netcdf_handle &) = delete;
134 
135  // move construction takes ownership
136  // from the other object
138  {
139  m_handle = other.m_handle;
140  other.m_handle = 0;
141  }
142 
143  // move assignment takes ownership
144  // from the other object
145  void operator=(netcdf_handle &&other)
146  {
147  this->close();
148  m_handle = other.m_handle;
149  other.m_handle = 0;
150  }
151 
152  // open the file. this can be used from MPI parallel runs, but collective
153  // I/O is not possible when a file is opend this way. Returns 0 on success.
154  int open(const std::string &file_path, int mode);
155 
156  // open the file. this can be used when collective I/O is desired. the
157  // passed in communcator specifies the subset of ranks that will access
158  // the file. Calling this when linked to a non-MPI enabled NetCDF install,
159  // from a parallel run will, result in an error. Returns 0 on success.
160  int open(MPI_Comm comm, const std::string &file_path, int mode);
161 
162  // create the file. this can be used from MPI parallel runs, but collective
163  // I/O is not possible when a file is created this way. Returns 0 on success.
164  int create(const std::string &file_path, int mode);
165 
166  // create the file. this can be used when collective I/O is desired. the
167  // passed in communcator specifies the subset of ranks that will access
168  // the file. Calling this when linked to a non-MPI enabled NetCDF install,
169  // from a parallel run will, result in an error. Returns 0 on success.
170  int create(MPI_Comm comm, const std::string &file_path, int mode);
171 
172  // close the file
173  int close();
174 
175  // flush all data to disk
176  int flush();
177 
178  // returns a reference to the handle
179  int &get()
180  { return m_handle; }
181 
182  // test if the handle is valid
183  operator bool() const
184  { return m_handle > 0; }
185 
186 private:
187  int m_handle;
188 };
189 
190 // read the specified variable attribute by name.
191 // it's value is stored in the metadata object
192 // return is non-zero if an error occurred
193 int read_attribute(netcdf_handle &fh, int var_id,
194  const std::string &att_name, teca_metadata &atts);
195 
196 // read the specified variable attribute by id
197 // it's value is stored in the metadata object
198 // return is non-zero if an error occurred
199 int read_attribute(netcdf_handle &fh, int var_id,
200  int att_id, teca_metadata &atts);
201 
202 // read the specified variable's name, dimensions, and it's associated
203 // NetCDF attributes into the metadata object. Additonally the following
204 // key/value pairs are added and useful for subsequent I/O and processing
205 //
206 // cf_id - the NetCDF variable id that can be used to read the variable
207 // cf_dims - a vector of the NetCDF dimension lengths (i.e. the variable's shape)
208 // cf_dim_names - a vector of the names of the NetCDF dimensions
209 // cf_type_code - the NetCDF type code
210 // type_code - the TECA type code
211 // centering - for now it is set to teca_array_attributes::point_centering
212 //
213 // return is non-zero if an error occurred
214 int read_variable_attributes(netcdf_handle &fh, int var_id,
215  std::string &name, teca_metadata &atts);
216 
217 int read_variable_attributes(netcdf_handle &fh,
218  const std::string &name, teca_metadata &atts);
219 
220 // functional that reads and returns a variable from the
221 // named file. we're doing this so we can do thread
222 // parallel I/O to hide some of the cost of opening files
223 // on Lustre and to hide the cost of reading time coordinate
224 // which is typically very expensive as NetCDF stores
225 // unlimted dimensions non-contiguously
226 //
227 // note: Thu 09 Apr 2020 05:45:29 AM PDT
228 // Threading these operations worked well in NetCDF 3, however
229 // in NetCDF 4 backed by HDF5 necessary locking eliminates any
230 // speed up.
232 {
233 public:
234  // data and task types
235  using data_elem_t = std::pair<p_teca_variant_array, teca_metadata>;
236  using data_t = std::pair<unsigned long, data_elem_t>;
237  using task_t = std::packaged_task<data_t()>;
239  using p_queue_t = std::shared_ptr<queue_t>;
240 
241  read_variable_and_attributes(const std::string &path, const std::string &file,
242  unsigned long id, const std::string &variable) : m_path(path),
243  m_file(file), m_variable(variable), m_id(id)
244  {}
245 
246  static
247  data_t package(unsigned long id,
248  p_teca_variant_array var = nullptr,
249  const teca_metadata &md = teca_metadata())
250  {
251  return std::make_pair(id, std::make_pair(var, md));
252  }
253 
254  data_t operator()();
255 
256 private:
257  std::string m_path;
258  std::string m_file;
259  std::string m_variable;
260  unsigned long m_id;
261 };
262 
263 // function that reads and returns a variable from the
264 // named file. we're doing this so we can do thread
265 // parallel I/O to hide some of the cost of opening files
266 // on Lustre and to hide the cost of reading time coordinate
267 // which is typically very expensive as NetCDF stores
268 // unlimted dimensions non-contiguously
269 //
270 // note: Thu 09 Apr 2020 05:45:29 AM PDT
271 // Threading these operations worked well in NetCDF 3, however
272 // in NetCDF 4 backed by HDF5 necessary locking eliminates any
273 // speed up.
275 {
276 public:
277  // data and task types
278  using data_t = std::pair<unsigned long, p_teca_variant_array>;
279  using task_t = std::packaged_task<data_t()>;
281  using p_queue_t = std::shared_ptr<queue_t>;
282 
283 
284  read_variable(const std::string &path, const std::string &file,
285  unsigned long id, const std::string &variable) : m_path(path),
286  m_file(file), m_variable(variable), m_id(id)
287  {}
288 
289  static
290  data_t package(unsigned long id,
291  p_teca_variant_array var = nullptr)
292  {
293  return std::make_pair(id, var);
294  }
295 
296  data_t operator()();
297 
298 private:
299  std::string m_path;
300  std::string m_file;
301  std::string m_variable;
302  unsigned long m_id;
303 };
304 
305 // write the attributes in array_atts to the variable identified by var_id the
306 // name is used in error messages. returns zero of successful.
307 int write_variable_attributes(netcdf_handle &fh, int var_id,
308  teca_metadata &array_atts);
309 
310 }
311 #endif
teca_metadata
Definition: teca_metadata.h:17
teca_netcdf_util::netcdf_handle
Definition: teca_netcdf_util.h:116
teca_thread_pool
Definition: teca_thread_pool.h:33
teca_netcdf_util::cpp_tt
Definition: teca_netcdf_util.h:60
teca_netcdf_util::read_variable_and_attributes
Definition: teca_netcdf_util.h:232
teca_netcdf_util::read_variable
Definition: teca_netcdf_util.h:275
teca_netcdf_util::netcdf_tt
Definition: teca_netcdf_util.h:59