cachedDBM.h
Go to the documentation of this file.
1 /*
2  @copyright Russell Standish 2000-2013
3  @author Russell Standish
4  This file is part of EcoLab
5 
6  Open source licensed under the MIT license. See LICENSE for details.
7 */
8 
12 #ifndef CACHEDBM_H
13 #define CACHEDBM_H
14 
15 #include "pack_base.h"
16 #include "classdesc_access.h"
17 //#include "pack_stl.h"
18 //#include "error.h"
19 #include "TCL_obj_base.h"
20 #include "TCL_obj_stl.h"
21 #include "omp_rw_lock.h"
22 
23 #include <string.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <fcntl.h>
29 #include <unistd.h>
30 #include <utility>
31 #include <limits>
32 
33 namespace ecolab
34 {
35  class Datum: public classdesc::xdr_pack
36  {
37  public:
39  // void assign_ptr(void *d, size_t sz) {Realloc(data,0); data=(char*)d; size=sz;}
40  void copy_ptr(void *d, size_t sz) {packraw((char*)d,sz);}
42  Datum(const Datum& x) {packraw(x.data(),x.size());}
43  Datum& operator<<(const Datum& x) {packraw(x.data(),x.size()); return *this;}
44  const Datum& operator>>(Datum& x) {x.packraw(data(),size()); return *this;}
45  template <class T>
46  Datum& operator<<(const T& x) {pack(*this,"",const_cast<T&>(x)); return *this;}
47  template <class T>
48  Datum& operator>>(T& x) {unpack(*this,"",x); return *this;}
49  Datum& operator=(const Datum& x) {reseti(); packraw(x.data(),x.size()); return *this;}
50  template <class T> Datum& operator=(const T& x){reseti(); pack(*this,"",const_cast<T&>(x)); return *this;}
51  template <class T> operator T() {T x; reseto(); unpack(*this,"",x); return x;}
52  };
53 
54 }
55 
56 // must be in global namespace due to a classdesc bug
57 class Db
58 {
59  void *db; // opaque pointer to database object (be it ndbm or Berkley)
60  void *dbc; // opaque pointer to database cursor (BDB)
61  Db(const Db& x);
62  void operator=(const Db& x);
63 public:
64  enum rw {read,write};
65 
66  void open(const char* filename,rw readwrite);
67  void close();
68  void flush();
69  bool fetch(const ecolab::Datum& key, ecolab::Datum& val) const;
70  void store(const ecolab::Datum& key, const ecolab::Datum& val);
71  void del(const ecolab::Datum& key);
73  bool firstkey(ecolab::Datum& key, ecolab::Datum& val) const;
74  bool nextkey(ecolab::Datum& key, ecolab::Datum& val) const;
75  bool opened() const {return db!=NULL;}
76  Db(const char* f,rw rdwr): db(NULL), dbc(NULL) {open(f,rdwr);}
77  ~Db() {close();}
78 };
79 
80 namespace ecolab
81 {
82  template <class key, class val> struct base_map: public std::map<key,val>
83  {
84  void set(const key& k, Datum& v)
85  {v>>std::map<key,val>::operator[](k);}
86  val& get(const key& k) {return std::map<key,val>::operator[](k);}
87  //virtual val& operator[] (const key& k)=0; //used to access cachedDBM::operator[] in TCL_obj()
88  virtual ~base_map() {}
89  };
90 
91  template <class U, class V>
92  struct sortPair
93  {
94  bool operator()(const std::pair<U,V>& x, const std::pair<U,V>& y) {return x.second<y.second;}
95  };
96 
97  class limited_set: public std::set<int>
98  {
99  size_t limit;
101  public:
102  limited_set(size_t limit): limit(limit) {}
103  void insert(int x) {
104  if (size() < limit || x>*begin()) std::set<int>::insert(x);
105  if (size() >= limit) erase(begin());
106  }
107  };
108 
109  /* make the main class a base class in order to derive a special case for
110  strings */
112  template<class key, class val>
113  class cachedDBM_base : protected base_map<key,val>
114  {
115  typedef base_map<key,val> Base;
116  shared_ptr<Db> db;
117  bool readonly;
118  classdesc::string filename;
119  typedef std::map<key,size_t> TSMap;
120  TSMap timestamp;
121  size_t ts;
122  RWlock rwl;
123  mutable bool last;
124  public:
125  size_t max_elem; /* limit number of elements to this value */
126  cachedDBM_base(): ts(1), max_elem(std::numeric_limits<int>::max()),
127  keys(*this) {}
128  void init(const char *fname, char mode='w')
129  {
130  db.reset(new Db((char*)fname, (mode=='w')? Db::write: Db::read));
131  readonly=mode=='r';
132  if (!db->opened()) throw error("DBM file %s open failed",fname);
133  filename=fname;
134  }
135  void Init(TCL_args args) {
136  char *fname=args, *mode=args;
137  init(fname,mode[0]);
138  }
139  // void init(const char *fname, char *mode="w") {init(fname,mode[0]);}
140  void close() {if (db) {commit(); db->close(); db.reset();} clear();}
141  ~cachedDBM_base() {close();}
142  bool opened() const {return bool(db);}
143  bool load(const key& k)
144  {
145  Datum dk, vv;
146  dk<<k;
147  write_lock w(rwl);
148  if (db && !db->fetch(dk,vv))
149  {
150  this->set(k,vv);
151  timestamp[k] = ts++;
152  return true;
153  }
154  return false;
155  }
156 
158  bool key_exists(const key& k) const
159  {
160  if (this->count(k)) return true;
161  if (db)
162  return const_cast<cachedDBM_base*>(this)->load(k);
163  return false;
164  }
165  val& operator[] (const key& k)
166  {
167  read_lock r(rwl);
168  if (Base::size()>=max_elem) {commit();} /* do a simple purge of database */
169  if (!key_exists(k)) //read data into memory if it exists
170  {
171  write_lock w(rwl);
172  Base::insert(std::make_pair(k,val()));
173  timestamp[k]=ts++;
174  }
175 #ifdef _OPENMP
176 #pragma omp atomic
177 #endif
178  ts++;
179  timestamp[k]=ts; //not so important if we pick up a simultaneous timestamp
180  return Base::get(k);
181  }
182 
186  val elem(TCL_args args)
187  {
188  if (args.count>=2)
189  return operator[](args[0].get<key>())=args[1].get<val>();
190  else if (args.count==1)
191  return operator[](args[0].get<key>());
192  else
193  return val();
194  }
195 
197  size_t cacheSize() const {return Base::size();}
198 
200  void clear() {Base::clear();}
201 
203  void commit()
204  {
205  write_lock w(rwl);
206  typename Base::iterator i, j;
207  Datum k, v;
208  size_t sum_ts=0;
209  for (typename TSMap::const_iterator t=timestamp.begin(); t!=timestamp.end(); ++t)
210  sum_ts+=t->second;
211  size_t cut_time=sum_ts/2;
212  limited_set ls(cut_time);
213  for (i=Base::begin(); i!=Base::end(); )
214  {
215  k=i->first; v=i->second;
216  if (db)
217  {
218  if (readonly && db->fetch(k,v))
219  {
220  ++i;
221  continue; // do not erase element not in database
222  }
223  else if (!readonly && v.data())
224  db->store(k,v);
225  }
226 
227  typename TSMap::iterator ts=timestamp.find(i->first);
228  j=i; i++; //save current iter for later erase
229  // erase map element if timestamp earlier than cut_time.
230  if (ts->second<cut_time)
231  {
232  Base::erase(j);
233  timestamp.erase(ts);
234  }
235  }
236  if (db) db->flush();
237  // rebase timestamps to cut_time
238  ts-=cut_time;
239  for (typename TSMap::iterator t=timestamp.begin(); t!=timestamp.end(); ++t)
240  t->second-=cut_time;
241 
242  }
243 
245  void del(key k)
246  {
247  if (db && db->opened())
248  {
249  write_lock w(rwl);
250  Datum dk;
251  dk=k;
252  db->del(dk);
253  }
254  Base::erase(k);
255  }
261  key firstkey() const
262  {
263  key k;
264  if (db)
265  {
266  Datum kk,vv;
267  if (!(last=db->firstkey(kk,vv)))
268  kk>>k;
269  }
270  return k;
271  }
273  key nextkey() const
274  {
275  key k;
276  if (db)
277  {
278  Datum kk,vv;
279  if (!(last=db->nextkey(kk,vv)))
280  kk>>k;
281  }
282  return k;
283  }
285  bool eof() const {return last || !db;}
286  void pack(classdesc::pack_t& b) {
287  if (opened()) {
288  commit();
289  ::pack(b,"",filename);
290  ::pack(b,"",readonly);
291  ::pack(b,"",static_cast<Base&>(*this));
292  } else {
293  classdesc::string nullstring;
294  ::pack(b,"",nullstring);
295  ::pack(b,"",static_cast<Base&>(*this));
296  }
297  }
298  void unpack(classdesc::pack_t& b) {
299  close();
300  classdesc::string fname;
301  bool readonly;
302  ::unpack(b,"",filename);
303  if (filename!="") {
304  ::unpack(b,"",readonly);
305  init(filename.c_str(),readonly? 'r': 'w');
306  }
307  ::unpack(b,"",static_cast<Base&>(*this));
308  }
309 
312  {
313  shared_ptr<Db> db;
314  std::pair<key,val> keyValue;
315  void getKV(bool (Db::*op)(Datum&, Datum&) const)
316  {
317  Datum k,v;
318  if ((db.get()->*op)(k,v))
319  db.reset();
320  else
321  {
322  k>>keyValue.first;
323  v>>keyValue.second;
324  }
325  }
326  public:
330  KeyValueIterator(const string& fname): db(new Db(fname.c_str(), Db::read))
331  {getKV(&Db::firstkey);}
332  KeyValueIterator& operator++() {getKV(&Db::nextkey); return *this;}
333  // iterator comparison is undefined when referring to different
334  // databases, and keys are unique within a given database, so we
335  // can use comparisons of keys
336  bool operator==(const KeyValueIterator& x) const
337  // TODO check whether operator< should be used here???
338  {return (!db && !x.db) || (db && x.db && keyValue.first==x.keyValue.first);}
339  bool operator!=(const KeyValueIterator& x) const
340  {return !operator==(x);}
341  const std::pair<key, val> operator*() const {return keyValue;}
342  const std::pair<key, val>* operator->() const {return &keyValue;}
343  };
344 
345  KeyValueIterator begin() const {return KeyValueIterator(filename);}
346  KeyValueIterator begin()
347  {commit(); return KeyValueIterator(filename);}
348  KeyValueIterator end() const {return KeyValueIterator();}
349 
351  {
352  public:
353  KeyIterator() {}
354  KeyIterator(const string& fname): KeyValueIterator(fname) {}
355  const key operator*() const {return KeyValueIterator::operator*().first;}
356  const key* operator->() const
357  {return &KeyValueIterator::operator*().first;}
358  };
359 
360  struct Keys
361  {
362  cachedDBM_base& _this;
363  Keys(cachedDBM_base& _this): _this(_this) {}
364  KeyIterator begin() const {return KeyIterator(_this.filename);}
365  KeyIterator begin()
366  {_this.commit(); return KeyIterator(_this.filename);}
367  KeyIterator end() const {return KeyIterator();}
368  };
370  Keys keys;
371 
372  };
373 
375  template<class key, class val>
376  class cachedDBM: public cachedDBM_base<key,val>
377  {
378  public:
379  cachedDBM(){}
380  cachedDBM(const char* f, char mode='w'){this->init(f,mode);}
381  };
382 
383  /*
384  specialisations to handle char * cases
385  */
386 
387  struct cachedDBM_string: public std::string
388  {
389  operator const char*() const {return c_str();}
390  cachedDBM_string& operator=(const char* s)
391  {std::string::operator=(s); return *this;}
392  cachedDBM_string() {}
393  cachedDBM_string(const char*x): std::string(x) {}
394  };
395 
396  template<class val>
397  class cachedDBM<char *,val>: public cachedDBM_base<std::string,val>
398  {
399  public:
400  cachedDBM(){}
401  cachedDBM(const char* f, char mode='w'){this->init(f,mode);}
402  };
403 
404  template<class key>
406  {
407  public:
408  cachedDBM(){}
409  cachedDBM(const char* f, char mode='w'){this->init(f,mode);}
410  };
411 
412  template <>
414  {
415  public:
416  cachedDBM(){}
417  cachedDBM(const char* f, char mode='w'){init(f,mode);}
418  };
419 
420 #ifdef _CLASSDESC
421 #pragma omit pack ecolab::Datum
422 #pragma omit unpack ecolab::Datum
423 #pragma omit pack ecolab::cachedDBM
424 #pragma omit unpack ecolab::cachedDBM
425 #pragma omit pack ecolab::cachedDBM_base
426 #pragma omit unpack ecolab::cachedDBM_base
427 
428 #pragma omit pack ecolab::Db
429 #pragma omit unpack ecolab::Db
430 #pragma omit pack Db
431 #pragma omit unpack Db
432 
433 #endif
434 }
435 
436 namespace classdesc_access
437 {
438  namespace cd=classdesc;
439 
440  template <>
441  struct access_pack<ecolab::Datum>
442  {
443  void operator()(cd::pack_t& t, const cd::string& d, const ecolab::Datum& x)
444  {pack(t,"",x.size()); t.packraw(x.data(),x.size());}
445  };
446 
447  template <>
448  struct access_unpack<ecolab::Datum>
449  {
450  void operator()(cd::pack_t& t, const cd::string& d, ecolab::Datum& x)
451  {
452  size_t size; unpack(t,"",size);
453  x.packraw(t.data()+t.pos(), size);
454  t.seeko(size);
455  }
456  void operator()(classdesc::pack_t& t, const classdesc::string& d, const ecolab::Datum& x1)
457  {
458  ecolab::Datum x(x1);
459  size_t size; unpack(t,"",size);
460  x.packraw(t.data()+t.pos(), size);
461  t.seeko(size);
462  }
463  };
464 
465  template <class K, class V>
466  struct access_pack<ecolab::cachedDBM<K,V> >
467  {
468  template <class U>
469  void operator()(classdesc::pack_t& b,const classdesc::string& d, U& a)
470  {a.pack(b);}
471  };
472 
473  template <class K, class V>
474  struct access_unpack<ecolab::cachedDBM<K,V> >
475  {
476  template <class U>
477  void operator()(classdesc::unpack_t& b,const classdesc::string& d,U& a)
478  {a.unpack(b);}
479  };
480 }
481 
482 #ifdef _CLASSDESC
483 #pragma omit TCL_obj ecolab::base_map
484 #endif
485 #include "cachedDBM.cd"
486 #endif
487 
488 
descriptor access to a class&#39;s privates
Definition: omp_rw_lock.h:87
Definition: omp_rw_lock.h:92
bool firstkey(ecolab::Datum &key, ecolab::Datum &val) const
KeyValueIterator()
initialises to an end() iterator
Definition: cachedDBM.h:328
Definition: cachedDBM.h:35
bool key_exists(const key &k) const
returns true if key is in data base or added with [] operator
Definition: cachedDBM.h:158
Definition: cachedDBM.h:387
EcoLab exception class.
Definition: error.h:25
iterator type for iterating over keys
Definition: cachedDBM.h:311
Definition: cachedDBM.h:57
serialisation descriptor
size_t size() const
size of buffer
Definition: TCL_obj_base.h:154
Definition: cachedDBM.h:82
void copy_ptr(void *d, size_t sz)
Definition: cachedDBM.h:40
class to allow access to private members
Definition: classdesc_access.h:21
TCL_obj support for STL containers.
A read/write lock pattern for OpenMP.
Definition: cachedDBM.h:360
const char * data() const
actual buffer
Definition: TCL_obj_base.h:152
val elem(TCL_args args)
Definition: cachedDBM.h:186
class to allow access to private members
Definition: classdesc_access.h:22
#define CLASSDESC_ACCESS(type)
add friend statements for each accessor function
Definition: classdesc_access.h:36
KeyValueIterator(const string &fname)
initialises to a begin() iterator of database fname
Definition: cachedDBM.h:330
Definition: cachedDBM.h:97
void commit()
write any changes out to the file, and clear some of the cache
Definition: cachedDBM.h:203
Represent arguments to TCL commands.
Definition: TCL_obj_base.h:138
Keys keys
access an iterator range of keys [keys.begin()...keys.end())
Definition: cachedDBM.h:370
TCL access descriptor.
Definition: omp_rw_lock.h:86
void clear()
clear the cache
Definition: cachedDBM.h:200
bool eof() const
true if no further keys remain when iterating
Definition: cachedDBM.h:285
persistent map
Definition: cachedDBM.h:376
const char * data() const
actual buffer
Definition: pack_base.h:152
Definition: cachedDBM.h:92
implementation of cacheDBM common to all specialisations
Definition: cachedDBM.h:113
Contains definitions related to classdesc functionality.
Definition: arrays.h:2514
Definition: pack_base.h:124
Definition: TCL_obj_base.h:124
_OPENMP
Definition: accessor.h:16
size_t cacheSize() const
number of elements in cache
Definition: cachedDBM.h:197
size_t pos() const
position of read pointer
Definition: pack_base.h:155
key nextkey() const
advance to next key in database
Definition: cachedDBM.h:273
key firstkey() const
obtain first key for iteration through database.
Definition: cachedDBM.h:261
Contains access_* structs, and nothing else. These structs are used to gain access to private members...
Definition: accessor.h:55
Definition: cachedDBM.h:350
void del(key k)
delete entry associated with key k
Definition: cachedDBM.h:245