xmlarchive.hh 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #ifndef XMLARCHIVE_HH_
  19. #define XMLARCHIVE_HH_
  20. #include <xercesc/parsers/SAXParser.hpp>
  21. #include <xercesc/util/PlatformUtils.hpp>
  22. #include <xercesc/util/BinInputStream.hpp>
  23. #include <xercesc/sax/HandlerBase.hpp>
  24. #include <xercesc/sax/InputSource.hpp>
  25. #include "recordio.hh"
  26. XERCES_CPP_NAMESPACE_USE
  27. namespace hadoop {
  28. class Value {
  29. private:
  30. std::string type;
  31. std::string value;
  32. public:
  33. Value(const std::string& t) { type = t; }
  34. void addChars(const char* buf, unsigned int len) {
  35. value += std::string(buf, len);
  36. }
  37. const std::string& getType() const { return type; }
  38. const std::string& getValue() const { return value; }
  39. };
  40. class MySAXHandler : public HandlerBase {
  41. private:
  42. std::vector<Value>& vlist;
  43. bool charsValid;
  44. public:
  45. MySAXHandler(std::vector<Value>& list) : vlist(list) {charsValid = false;}
  46. void startElement(const XMLCh* const name, AttributeList& attr);
  47. void endElement(const XMLCh* const name);
  48. void characters(const XMLCh* const buf, unsigned int len);
  49. };
  50. class XmlIndex : public Index {
  51. private:
  52. std::vector<Value>& vlist;
  53. unsigned int& vidx;
  54. public:
  55. XmlIndex(std::vector<Value>& list, unsigned int& idx) : vlist(list), vidx(idx) {}
  56. bool done() {
  57. Value v = vlist[vidx];
  58. return (v.getType() == "/array") ? true : false;
  59. }
  60. void incr() {}
  61. ~XmlIndex() {}
  62. };
  63. class MyBinInputStream : public BinInputStream {
  64. private:
  65. InStream& stream;
  66. unsigned int pos;
  67. public:
  68. MyBinInputStream(InStream& s) : stream(s) { pos = 0; }
  69. virtual unsigned int curPos() const { return pos; }
  70. virtual unsigned int readBytes(XMLByte* const toFill,
  71. const unsigned int maxToRead) {
  72. ssize_t nread = stream.read(toFill, maxToRead);
  73. if (nread < 0) {
  74. return 0;
  75. } else {
  76. pos += nread;
  77. return nread;
  78. }
  79. }
  80. };
  81. class MyInputSource : public InputSource {
  82. private:
  83. InStream& stream;
  84. public:
  85. MyInputSource(InStream& s) : stream(s) { }
  86. virtual BinInputStream* makeStream() const {
  87. return new MyBinInputStream(stream);
  88. }
  89. virtual const XMLCh* getEncoding() const {
  90. return XMLString::transcode("UTF-8");
  91. }
  92. virtual ~MyInputSource() {}
  93. };
  94. class IXmlArchive : public IArchive {
  95. private:
  96. std::vector<Value> vlist;
  97. unsigned int vidx;
  98. MySAXHandler *docHandler;
  99. SAXParser *parser;
  100. MyInputSource* src;
  101. Value next() {
  102. Value v = vlist[vidx];
  103. vidx++;
  104. return v;
  105. }
  106. public:
  107. IXmlArchive(InStream& _stream) {
  108. vidx = 0;
  109. try {
  110. XMLPlatformUtils::Initialize();
  111. } catch (const XMLException& e) {
  112. throw new IOException("Unable to initialize XML Parser.");
  113. }
  114. parser = new SAXParser();
  115. docHandler = new MySAXHandler(vlist);
  116. parser->setDocumentHandler(docHandler);
  117. src = new MyInputSource(_stream);
  118. try {
  119. parser->parse(*src);
  120. } catch (const XMLException& e) {
  121. throw new IOException("Unable to parse XML stream.");
  122. } catch (const SAXParseException& e) {
  123. throw new IOException("Unable to parse XML stream.");
  124. }
  125. delete parser;
  126. delete docHandler;
  127. }
  128. virtual void deserialize(int8_t& t, const char* tag);
  129. virtual void deserialize(bool& t, const char* tag);
  130. virtual void deserialize(int32_t& t, const char* tag);
  131. virtual void deserialize(int64_t& t, const char* tag);
  132. virtual void deserialize(float& t, const char* tag);
  133. virtual void deserialize(double& t, const char* tag);
  134. virtual void deserialize(std::string& t, const char* tag);
  135. virtual void deserialize(std::string& t, size_t& len, const char* tag);
  136. virtual void startRecord(Record& s, const char* tag);
  137. virtual void endRecord(Record& s, const char* tag);
  138. virtual Index* startVector(const char* tag);
  139. virtual void endVector(Index* idx, const char* tag);
  140. virtual Index* startMap(const char* tag);
  141. virtual void endMap(Index* idx, const char* tag);
  142. virtual ~IXmlArchive() {
  143. XMLPlatformUtils::Terminate();
  144. }
  145. };
  146. class OXmlArchive : public OArchive {
  147. private:
  148. OutStream& stream;
  149. std::vector<std::string> cstack;
  150. void insideRecord(const char* tag) {
  151. printBeginEnvelope(tag);
  152. cstack.push_back("record");
  153. }
  154. void outsideRecord(const char* tag) {
  155. std::string s = cstack.back();
  156. cstack.pop_back();
  157. if (s != "record") {
  158. throw new IOException("Error deserializing record.");
  159. }
  160. printEndEnvelope(tag);
  161. }
  162. void insideVector(const char* tag) {
  163. printBeginEnvelope(tag);
  164. cstack.push_back("vector");
  165. }
  166. void outsideVector(const char* tag) {
  167. std::string s = cstack.back();
  168. cstack.pop_back();
  169. if (s != "vector") {
  170. throw new IOException("Error deserializing vector.");
  171. }
  172. printEndEnvelope(tag);
  173. }
  174. void insideMap(const char* tag) {
  175. printBeginEnvelope(tag);
  176. cstack.push_back("map");
  177. }
  178. void outsideMap(const char* tag) {
  179. std::string s = cstack.back();
  180. cstack.pop_back();
  181. if (s != "map") {
  182. throw new IOException("Error deserializing map.");
  183. }
  184. printEndEnvelope(tag);
  185. }
  186. void p(const char* cstr) {
  187. stream.write(cstr, strlen(cstr));
  188. }
  189. void printBeginEnvelope(const char* tag) {
  190. if (cstack.size() != 0) {
  191. std::string s = cstack.back();
  192. if ("record" == s) {
  193. p("<member>\n");
  194. p("<name>");
  195. p(tag);
  196. p("</name>\n");
  197. p("<value>");
  198. } else if ("vector" == s) {
  199. p("<value>");
  200. } else if ("map" == s) {
  201. p("<value>");
  202. }
  203. } else {
  204. p("<value>");
  205. }
  206. }
  207. void printEndEnvelope(const char* tag) {
  208. if (cstack.size() != 0) {
  209. std::string s = cstack.back();
  210. if ("record" == s) {
  211. p("</value>\n");
  212. p("</member>\n");
  213. } else if ("vector" == s) {
  214. p("</value>\n");
  215. } else if ("map" == s) {
  216. p("</value>\n");
  217. }
  218. } else {
  219. p("</value>\n");
  220. }
  221. }
  222. public:
  223. OXmlArchive(OutStream& _stream) : stream(_stream) {}
  224. virtual void serialize(int8_t t, const char* tag);
  225. virtual void serialize(bool t, const char* tag);
  226. virtual void serialize(int32_t t, const char* tag);
  227. virtual void serialize(int64_t t, const char* tag);
  228. virtual void serialize(float t, const char* tag);
  229. virtual void serialize(double t, const char* tag);
  230. virtual void serialize(const std::string& t, const char* tag);
  231. virtual void serialize(const std::string& t, size_t len, const char* tag);
  232. virtual void startRecord(const Record& s, const char* tag);
  233. virtual void endRecord(const Record& s, const char* tag);
  234. virtual void startVector(size_t len, const char* tag);
  235. virtual void endVector(size_t len, const char* tag);
  236. virtual void startMap(size_t len, const char* tag);
  237. virtual void endMap(size_t len, const char* tag);
  238. virtual ~OXmlArchive();
  239. };
  240. }
  241. #endif /*XMLARCHIVE_HH_*/