xml_unpack_base.h
Go to the documentation of this file.
1 /*
2  @copyright Russell Standish 2000-2013
3  @author Russell Standish
4  This file is part of Classdesc
5 
6  Open source licensed under the MIT license. See LICENSE for details.
7 */
8 
13 #ifndef XML_UNPACK_BASE_H
14 #define XML_UNPACK_BASE_H
15 #include <map>
16 #include <iostream>
17 #include <sstream>
18 #include <fstream>
19 #include <limits>
20 #include <cstdlib>
21 #include <cctype>
22 
23 #include "xml_common.h"
24 #include "classdesc.h"
25 // for xml_unpack_t serialisation support
26 #include "pack_base.h"
27 #include "pack_stl.h"
28 
29 namespace classdesc_access
30 {
31  template <class T> struct access_pack;
32  template <class T> struct access_unpack;
33 }
34 
35 namespace classdesc
36 {
37 
38  namespace
39  {
41  inline bool isspace(std::string s)
42  {
43  if (s.empty()) return false;
44  for (size_t i=0; i<s.size(); i++)
45  if (!std::isspace(s[i]))
46  return false;
47  return true;
48  }
49  }
50 
51  // for remove() below
52  inline bool Isspace(char c) {return std::isspace(c)!=0;}
53 
54 #ifdef _CLASSDESC
55 #pragma omit pack classdesc::XMLtoken
56 #pragma omit pack classdesc::xml_pack_error
57 #pragma omit unpack classdesc::XMLtoken
58 #pragma omit unpack classdesc::xml_pack_error
59 #pragma omit xml_pack classdesc::XMLtoken
60 #pragma omit xml_pack classdesc::xml_pack_error
61 #pragma omit xml_unpack classdesc::XMLtoken
62 #pragma omit xml_unpack classdesc::xml_pack_error
63 #pragma omit json_pack classdesc::xml_pack_error
64 #pragma omit json_unpack classdesc::xml_pack_error
65 #pragma omit dump classdesc::xml_pack_error
66 #endif
67 
68  class xml_pack_error : public exception
69  {
70  std::string msg;
71  public:
72  xml_pack_error(const char *s): msg("xml_pack:") {msg+=s;}
73  xml_pack_error(std::string s): msg("xml_pack:") {msg+=s;}
74  virtual ~xml_pack_error() throw() {}
75  virtual const char* what() const throw() {return msg.c_str();}
76  };
77 
78  // character accessor functions: istream and FILE* defined here.
79  inline bool get(std::istream& i, char& c) {return i.get(c).good();}
80  inline bool get(FILE*& i, char& c)
81  {int cc=fgetc(i); c=char(cc); return cc!=EOF;}
82  inline void unget(std::istream& i, char c) {i.putback(c);}
83  inline void unget(FILE*& i, char c) {ungetc(c,i);}
84 
85  template <class Stream>
86  class XMLtoken
87  {
88  Stream& i;
89  char nexttok;
90 
91  // basic I/O operations
92  bool get(char& c) {return classdesc::get(i,c);}
93  void unget(char c) {classdesc::unget(i,c);}
95  char getNoEOF() {
96  char r;
97  if (!get(r)) throw xml_pack_error("invalid XML");
98  return r;
99  }
100 
101  void gobble_comment();
102  void gobble_whitespace() {
103  char c;
104  bool notEof=get(c);
105  while (notEof && std::isspace(c)) notEof=get(c);
106  if (notEof) unget(c);
107  }
108  char parse_entity();
109  std::string retval(char c, const std::string& tok);
110  public:
111  XMLtoken(Stream& i): i(i), nexttok('\0') {}
112  std::string token();
113  std::string tokenNoEOF() {
114  std::string tok=token();
115  if (tok.empty()) throw xml_pack_error("XML token expected");
116  else return tok;
117  }
118  };
119 
120  template <class Stream>
122  {
123  int level=1;
124  bool inString=false;
125  char c;
126  while (level)
127  {
128  c=getNoEOF();
129  if (c=='"') inString=!inString;
130  if (inString) continue;
131  switch(c)
132  {
133  case '<': level++; break;
134  case '>': level--; break;
135  }
136  }
137  gobble_whitespace();
138  }
139 
140  template <class Stream>
142  {
143  std::string name;
144  char c;
145  for (c=getNoEOF(); c!=';'; c=getNoEOF())
146  name+=c;
147  if (name=="amp") return '&';
148  if (name=="lt") return '<';
149  if (name=="gt") return '>';
150  if (name=="quot") return '"';
151  if (name=="apos") return '\'';
152  const char* cname=name.c_str();
153  if (cname[0]=='#') //character code supplied
154  {
155  if (cname[1]=='x') //is hex
156  {
157  //TODO - should we be doing this all in wide chars?
158  long r=std::strtol(cname+2,NULL,16);
159  if (r>std::numeric_limits<char>::max() || r<std::numeric_limits<char>::min())
160  throw xml_pack_error("XML numeric character reference out of range");
161  return char(r);
162  }
163  else
164  {
165  //TODO - should we be doing this all in wide chars?
166  long r=std::strtol(cname+1,NULL,10);
167  if (r>std::numeric_limits<char>::max() || r<std::numeric_limits<char>::min())
168  throw xml_pack_error("XML numeric character reference out of range");
169  return char(r);
170  }
171  }
172  // not sure what to do about user defined entities - throw, or issue a warning
173  throw xml_pack_error("Unidentified entity encountered");
174  }
175 
176  // This allows a previous token to be return when a single character token in parsed
177  template <class Stream>
178  std::string XMLtoken<Stream>::retval(char c, const std::string& tok)
179  {
180  if (tok.empty())
181  {
182  nexttok='\0';
183  switch (c)
184  {
185  case '/': return "</";
186  case '\\': return "/>";
187  default: return std::string(1,c);
188  }
189  }
190  else
191  {
192  nexttok=c;
193  return tok;
194  }
195  }
196 
197  template <class Stream>
198  std::string XMLtoken<Stream>::token()
199  {
200  std::string tok;
201  char c;
202 
203  // handle any tokens left over from previous parsing
204  if (nexttok)
205  return retval(nexttok,tok);
206 
207  while (get(c))
208  {
209  // return white space as a separate token
210  if (std::isspace(c)) return retval(c,tok);
211 
212  switch (c)
213  {
214  case '&':
215  tok+=parse_entity();
216  continue;
217  case '\'':
218  case '"': //process string literal as single token
219  {
220  char term=c;
221  while ((c=getNoEOF())!=term)
222  if (c=='&')
223  tok+=parse_entity();
224  else
225  tok+=c;
226  return tok;
227  }
228  case '<':
229  c=getNoEOF();
230  switch (c)
231  {
232  case '?':
233  case '!': //we have a comment or XML declaration, which we ignore
234  gobble_comment(); continue;
235  case '/': //we have begin end tag token
236  return retval('/',tok);
237  default:
238  {
239  unget(c);
240  return retval('<',tok);
241  }
242  }
243  case '/':
244  if ((c=getNoEOF())=='>') //we have end empty tag token
245  return retval('\\',tok);
246  else //TODO is a / in the middle of a token acceptible XML?
247  {
248  tok+='/';
249  unget(c);
250  break;
251  }
252  case '>':
253  case '=':
254  return retval(c,tok);
255  default:
256  tok+=c;
257  }
258  }
259  if (tok.empty())
260  return tok; //empty token returned on end of file
261  else
262  throw xml_pack_error("XML file truncated?");
263  }
264 
269  {
270  public:
271  typedef std::map<std::string,std::string> ContentMap;
272  private:
273  ContentMap contentMap;
274  std::map<std::string,unsigned> tokenCount;
275 
276  void checkKey(const std::string& key)
277  {
278  if (missingException && !contentMap.count(key))
279  throw xml_pack_error(key+" is missing in XML data stream");
280  }
281 
282  // add "#0" to components if no # label present
283  std::string addHashNoughts(const std::string& key)
284  {
285  std::string r;
286  std::string::size_type start=0, end;
287  bool hash_read=false;
288  for (end=0; end<=key.length(); end++)
289  if (key[end]=='#')
290  hash_read=true;
291  else if (key[end]=='.')
292  {
293  if (hash_read)
294  hash_read=false;
295  else // no hash read, so insert "#0"
296  {
297  r+=key.substr(start,end-start)+"#0";
298  start=end;
299  }
300  }
301  r+=key.substr(start,end-start);
302  if (!hash_read)
303  r+="#0";
304  return r;
305  }
306 
307  friend struct classdesc_access::access_pack<xml_unpack_t>;
308  friend struct classdesc_access::access_unpack<xml_unpack_t>;
309  public:
313  xml_unpack_t(): missingException(false) {}
314  xml_unpack_t(const char* fname): missingException(false) {std::ifstream i(fname); parse(i);}
315  template <class Stream> xml_unpack_t(Stream& i): missingException(false) {parse(i);}
316  template <class Stream> void process_attribute(XMLtoken<Stream>& i, const std::string& scope);
317  template <class Stream> void parse(Stream& i);
318  template <class Stream> void parse(XMLtoken<Stream>& stream, const std::string& scope);
319 
321  ContentMap::const_iterator firstToken(const std::string& prefix) const {
322  return contentMap.lower_bound(prefix);
323  }
324  ContentMap::const_iterator endToken(const std::string& prefix) const {
325  return contentMap.upper_bound(prefix);
326  }
327 
329  void printContentMap() const {
330  for (std::map<std::string,std::string>::const_iterator i=contentMap.begin();
331  i!=contentMap.end(); i++)
332  std::cout << "["<<i->first<<"]="<<i->second<<std::endl;
333  std::cout << std::endl;
334  for (std::map<std::string,unsigned>::const_iterator i=tokenCount.begin();
335  i!=tokenCount.end(); i++)
336  std::cout << "Count["<<i->first<<"]="<<i->second<<std::endl;
337  }
339  template <class T> void unpack(std::string key, T& var) {
340  key=addHashNoughts(key); checkKey(key);
341  std::map<std::string,std::string>::const_iterator it=contentMap.find(key);
342  if (it != contentMap.end()) {
343  std::istringstream s(it->second);
344  s>>var;
345  }
346  }
347  // specialisation to handle boolean values
348  void unpack(std::string key, bool& var) {
349  key=addHashNoughts(key); checkKey(key);
350  std::map<std::string,std::string>::const_iterator it=contentMap.find(key);
351  if (it != contentMap.end())
352  {
353  std::string val=it->second;
354  // strip any white space
355  val.erase(remove_if(val.begin(), val.end(), Isspace), val.end());
356  for (size_t i=0; i<val.length(); ++i) val[i]=char(tolower(val[i]));
357  var = val=="1" || val=="t" || val=="true"|| val=="y"|| val=="yes" ||
358  val=="on";
359  }
360  }
362  void unpack(std::string key, std::string& var) {
363  key=addHashNoughts(key); checkKey(key);
364  std::map<std::string,std::string>::const_iterator it=contentMap.find(key);
365  if (it != contentMap.end())
366  var=it->second;
367  }
369  bool exists(const std::string& key) {return count(key)>0;}
371  size_t count(std::string key) {
372  key=addHashNoughts(key);
373  key=key.substr(0,key.rfind('#')); //strip final # marker
374  return tokenCount[key];
375  }
376  void clear() {contentMap.clear(); tokenCount.clear();}
377  };
378 
382  template <class Stream>
383  void xml_unpack_t::process_attribute(XMLtoken<Stream>& stream, const std::string& scope)
384  {
385  std::string tok;
386  while (isspace(tok=stream.tokenNoEOF()));
387  if (tok!="=") throw xml_pack_error("ill-formed attribute");
388  while (isspace(tok=stream.tokenNoEOF()));
389  contentMap[scope]=tok;
390  }
391 
396  template <class Stream>
397  void xml_unpack_t::parse(Stream& i)
398  {
399  XMLtoken<Stream> stream(i);
400  std::string tok;
401  while (isspace(tok=stream.token()));
402  if (tok.empty()) return;
403  if (tok=="<")
404  parse(stream,stream.tokenNoEOF());
405  else
406  throw xml_pack_error("no root element found");
407  }
408 
409  template <class Stream>
410  void xml_unpack_t::parse(XMLtoken<Stream>& stream, const std::string& scope)
411  {
412  //count the number of times this token has been read, and append this to database key
413  std::string scope_idx=idx(scope,tokenCount[scope]++);
414 
415  std::string tok;
416  //parse attributes
417  for (tok=stream.tokenNoEOF(); tok!=">" && tok!="/>"; tok=stream.tokenNoEOF())
418  if (!isspace(tok)) process_attribute(stream, scope_idx+"."+tok);
419 
420  if (tok=="/>") return;
421 
422  //parse content. We assume element is either just content, or just has child elements
423  std::string content;
424  for (tok=stream.tokenNoEOF(); tok!="</"; tok=stream.tokenNoEOF())
425  if (tok=="<")
426  parse(stream,scope_idx+"."+stream.tokenNoEOF()); //parse child element
427  else
428  content+=tok;
429 
430  if (content.size())
431  contentMap[scope_idx]=content; //override content (to handle masked private members)
432 
433  // finish parsing end tag
434  tok=stream.tokenNoEOF();
435  if (scope.length()-scope.rfind(tok)!=tok.length()) //tok matches last part of scope
436  throw xml_pack_error("unexpected end tag");
437  for (; tok!=">"; tok=stream.tokenNoEOF()); //skip rest of end tag
438  }
439 
440 
441 }
442 
443 namespace classdesc_access
444 {
445  template <class T> struct access_xml_unpack;
446 }
447 
448 template <class T> void xml_unpack(classdesc::xml_unpack_t&,const classdesc::string&,T&);
449 
450 template <class T> classdesc::xml_unpack_t& operator>>(classdesc::xml_unpack_t& t, T& a);
451 
452  /*
453  base type implementations
454  */
455 namespace classdesc
456 {
457  template <class T>
458  void xml_unpack_onbase(xml_unpack_t& x,const string& d,T& a)
459  {::xml_unpack(x,d+basename<T>(),a);}
460 
461  template <class T>
462  typename enable_if<is_fundamental<T>, void>::T
463  xml_unpackp(xml_unpack_t& x,const string& d,T& a)
464  {x.unpack(d,a);}
465 }
466 
467 using classdesc::xml_unpack_onbase;
468 
469 /* now define the array version */
470 #include <stdarg.h>
471 
472  template <class T> void xml_unpack(classdesc::xml_unpack_t& x,const classdesc::string& d,classdesc::is_array ia,
473  T& a, int dims,size_t ncopies,...)
474  {
475  va_list ap;
476  va_start(ap,ncopies);
477  for (int i=1; i<dims; i++) ncopies*=va_arg(ap,int); //assume that 2 and higher D arrays dimensions are int
478  va_end(ap);
479 
480  classdesc::string eName=classdesc::typeName<T>().c_str();
481  // strip leading namespace and qualifiers
482  const char *e=eName.c_str()+eName.length();
483  while (e!=eName.c_str() && *(e-1)!=' ' && *(e-1)!=':') e--;
484 
485  for (size_t i=0; i<ncopies; i++)
486  xml_unpack(x,classdesc::idx(d+"."+e,i),(&a)[i]);
487  }
488 
489 //Enum_handles have reference semantics
490 template <class T> void xml_unpack(classdesc::xml_unpack_t& x,
491  const classdesc::string& d,
493 {
494  std::string tmp;
495  xml_unpack(x,d,tmp);
496  // remove extraneous white space
497  int (*isspace)(int)=std::isspace;
498  std::string::iterator end=std::remove_if(tmp.begin(),tmp.end(),isspace);
499  arg=tmp.substr(0,end-tmp.begin());
500 }
501 
502 template <class T1, class T2>
503 void xml_unpack(classdesc::xml_unpack_t& x, const classdesc::string& d,
504  std::pair<T1,T2>& arg)
505 {
506  xml_unpack(x,d+".first",arg.first);
507  xml_unpack(x,d+".second",arg.second);
508 }
509 
510 namespace classdesc
511 {
512  template <class T> typename
513  enable_if<is_sequence<T>, void>::T
514  xml_unpackp(xml_unpack_t& x, const string& d, T& arg, dummy<1> dum=0)
515  {
516  string eName=typeName<typename T::value_type>().c_str();
517  eName=eName.substr(0,eName.find('<')); //trim off any template args
518  // strip leading namespace and qualifiers
519  const char *e=eName.c_str()+eName.length();
520  while (e!=eName.c_str() && *(e-1)!=' ' && *(e-1)!=':') e--;
521 
522  arg.clear();
523  for (size_t i=0; i<x.count(d+"."+e); ++i)
524  {
525  typename T::value_type v;
526  ::xml_unpack(x,classdesc::idx(d+"."+e,i),v);
527  arg.push_back(v);
528  }
529  }
530 
531  template <class T> typename
533  xml_unpackp(xml_unpack_t& x, const string& d, T& arg, dummy<2> dum=0)
534  {
535  string eName=typeName<typename T::value_type>().c_str();
536  eName=eName.substr(0,eName.find('<')); //trim off any template args
537  // strip leading namespace and qualifiers
538  const char *e=eName.c_str()+eName.length();
539  while (e!=eName.c_str() && *(e-1)!=' ' && *(e-1)!=':') e--;
540 
541  arg.clear();
542  string prefix=d.empty()? e: d+"."+e;
543  for (size_t i=0; i<x.count(prefix); ++i)
544  {
545  typename NonConstKeyValueType<typename T::value_type>::T v;
546  ::xml_unpack(x,idx(prefix,i),v);
547  arg.insert(v);
548  }
549  }
550 }
551 
552 /* member functions */
553 template<class C, class T>
554 void xml_unpack(classdesc::xml_unpack_t& targ, const classdesc::string& desc, C& c, T arg) {}
555 
556 template<class T>
557 void xml_unpack(classdesc::xml_unpack_t& targ, const classdesc::string& desc,
558  classdesc::is_const_static i, T arg)
559 {}
560 
561 template<class T, class U>
562 void xml_unpack(classdesc::xml_unpack_t& targ, const classdesc::string& desc,
563  classdesc::is_const_static i, const T&, U) {}
564 
565 template<class T>
566 void xml_unpack(classdesc::xml_unpack_t& targ, const classdesc::string& desc,
568 
569 namespace classdesc
570 {
571  template<class T>
572  void xml_unpack(xml_unpack_t& targ, const string& desc, is_graphnode, T&)
573  {
574  throw exception("xml_unpack of arbitrary graphs not supported");
575  }
576 
577 
578 }
579 #endif
void unpack(std::string key, std::string &var)
string deserialisation
Definition: xml_unpack_base.h:362
Definition: xml_unpack_base.h:86
serialisation descriptor
Definition: classdesc.h:631
Definition: classdesc.h:623
class to allow access to private members
Definition: classdesc_access.h:21
Definition: classdesc.h:626
Definition: xml_unpack_base.h:268
class to allow access to private members
Definition: classdesc_access.h:22
Definition: xml_unpack_base.h:68
ContentMap::const_iterator firstToken(const std::string &prefix) const
first token starting with prefix
Definition: xml_unpack_base.h:321
Definition: classdesc.h:588
void unpack(std::string key, T &var)
simple data type deserialisation
Definition: xml_unpack_base.h:339
std::string idx(const std::string &prefix, size_t i)
utility for generating index keys (for use with arrays)
Definition: xml_common.h:14
serialisation for standard containers
void printContentMap() const
dump XML contents for debugging
Definition: xml_unpack_base.h:329
Contains definitions related to classdesc functionality.
Definition: arrays.h:2514
controlled template specialisation: stolen from boost::enable_if.
Definition: classdesc.h:249
Definition: classdesc.h:266
Contains access_* structs, and nothing else. These structs are used to gain access to private members...
Definition: accessor.h:55
base class for exceptions thrown by classdesc
Definition: classdesc.h:366
size_t count(std::string key)
returns number of array elements with prefix key
Definition: xml_unpack_base.h:371
Definition: classdesc.h:704
bool missingException
Definition: xml_unpack_base.h:312
void unpack(unpack_t &targ, const string &desc, is_treenode dum, T *&arg)
unserialise a tree.
Definition: pack_graph.h:44
bool exists(const std::string &key)
checks for existence of token unpacked from XML stream
Definition: xml_unpack_base.h:369