xmlarchive.cc 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include "xmlarchive.hh"
  19. #include <stdlib.h>
  20. using namespace hadoop;
  21. void hadoop::MySAXHandler::startElement(const XMLCh* const name, AttributeList& attr)
  22. {
  23. charsValid = false;
  24. char* qname = XMLString::transcode(name);
  25. if(std::string("boolean") == qname ||
  26. std::string("ex:i1") == qname ||
  27. std::string("i4") == qname ||
  28. std::string("int") == qname ||
  29. std::string("ex:i8") == qname ||
  30. std::string("ex:float") == qname ||
  31. std::string("double") == qname ||
  32. std::string("string") == qname) {
  33. std::string s(qname);
  34. Value v(s);
  35. vlist.push_back(v);
  36. charsValid = true;
  37. } else if(std::string("struct") == qname ||
  38. std::string("array") == qname) {
  39. std::string s(qname);
  40. Value v(s);
  41. vlist.push_back(v);
  42. }
  43. XMLString::release(&qname);
  44. }
  45. void hadoop::MySAXHandler::endElement(const XMLCh* const name)
  46. {
  47. charsValid = false;
  48. char* qname = XMLString::transcode(name);
  49. if(std::string("struct") == qname ||
  50. std::string("array") == qname) {
  51. std::string s = "/";
  52. Value v(s + qname);
  53. vlist.push_back(v);
  54. }
  55. XMLString::release(&qname);
  56. }
  57. void hadoop::MySAXHandler::characters(const XMLCh* const buf, const unsigned int len)
  58. {
  59. if (charsValid) {
  60. char *cstr = XMLString::transcode(buf);
  61. Value& v = vlist.back();
  62. v.addChars(cstr, strlen(cstr));
  63. XMLString::release(&cstr);
  64. }
  65. }
  66. static char hexchars[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  67. 'A', 'B', 'C', 'D', 'E', 'F' };
  68. static std::string toXMLString(std::string s)
  69. {
  70. std::string r;
  71. size_t len = s.length();
  72. size_t i;
  73. const char* data = s.data();
  74. for (i=0; i<len; i++, data++) {
  75. char ch = *data;
  76. if (ch == '<') {
  77. r.append("&lt;");
  78. } else if (ch == '&') {
  79. r.append("&amp;");
  80. } else if (ch == '%') {
  81. r.append("%0025");
  82. } else if (ch < 0x20) {
  83. uint8_t* pb = (uint8_t*) &ch;
  84. char ch1 = hexchars[*pb/16];
  85. char ch2 = hexchars[*pb%16];
  86. r.push_back('%');
  87. r.push_back('0');
  88. r.push_back('0');
  89. r.push_back(ch1);
  90. r.push_back(ch2);
  91. } else {
  92. r.push_back(ch);
  93. }
  94. }
  95. return r;
  96. }
  97. static uint8_t h2b(char ch) {
  98. if ((ch >= '0') || (ch <= '9')) {
  99. return ch - '0';
  100. }
  101. if ((ch >= 'a') || (ch <= 'f')) {
  102. return ch - 'a' + 10;
  103. }
  104. if ((ch >= 'A') || (ch <= 'F')) {
  105. return ch - 'A' + 10;
  106. }
  107. return 0;
  108. }
  109. static std::string fromXMLString(std::string s)
  110. {
  111. std::string r;
  112. size_t len = s.length();
  113. size_t i;
  114. uint8_t* pb = (uint8_t*) s.data();
  115. for (i = 0; i < len; i++) {
  116. uint8_t b = *pb;
  117. if (b == '%') {
  118. char *pc = (char*) (pb+1);
  119. // ignore the first two characters, which are always '0'
  120. *pc++;
  121. *pc++;;
  122. char ch1 = *pc++;
  123. char ch2 = *pc++;
  124. pb += 4;
  125. uint8_t cnv = h2b(ch1)*16 + h2b(ch2);
  126. pc = (char*) &cnv;
  127. r.push_back(*pc);
  128. } else {
  129. char *pc = (char*) pb;
  130. r.push_back(*pc);
  131. }
  132. pb++;
  133. }
  134. return r;
  135. }
  136. static std::string toXMLBuffer(std::string s, size_t len)
  137. {
  138. std::string r;
  139. size_t i;
  140. uint8_t* data = (uint8_t*) s.data();
  141. for (i=0; i<len; i++, data++) {
  142. uint8_t b = *data;
  143. char ch1 = hexchars[b/16];
  144. char ch2 = hexchars[b%16];
  145. r.push_back(ch1);
  146. r.push_back(ch2);
  147. }
  148. return r;
  149. }
  150. static std::string fromXMLBuffer(std::string s, size_t& len)
  151. {
  152. len = s.length();
  153. if (len%2 == 1) { // len is guaranteed to be even
  154. throw new IOException("Errror deserializing buffer.");
  155. }
  156. len = len >> 1;
  157. std::string t;
  158. for (size_t idx = 0; idx < len; idx++) {
  159. char buf[3];
  160. buf[0] = s[2*idx];
  161. buf[1] = s[2*idx+1];
  162. buf[2] = '\0';
  163. int i;
  164. if (1 != sscanf(buf, "%2x", &i)) {
  165. throw new IOException("Errror deserializing buffer.");
  166. }
  167. t.push_back((char) i);
  168. }
  169. len = t.length();
  170. return t;
  171. }
  172. void hadoop::IXmlArchive::deserialize(int8_t& t, const char* tag)
  173. {
  174. Value v = next();
  175. if (v.getType() != "ex:i1") {
  176. throw new IOException("Error deserializing byte");
  177. }
  178. t = (int8_t) strtol(v.getValue().c_str(), NULL, 10);
  179. }
  180. void hadoop::IXmlArchive::deserialize(bool& t, const char* tag)
  181. {
  182. Value v = next();
  183. if (v.getType() != "boolean") {
  184. throw new IOException("Error deserializing boolean");
  185. }
  186. t = (v.getValue() == "1");
  187. }
  188. void hadoop::IXmlArchive::deserialize(int32_t& t, const char* tag)
  189. {
  190. Value v = next();
  191. if (v.getType() != "i4" && v.getType() != "int") {
  192. throw new IOException("Error deserializing int");
  193. }
  194. t = (int32_t) strtol(v.getValue().c_str(), NULL, 10);
  195. }
  196. void hadoop::IXmlArchive::deserialize(int64_t& t, const char* tag)
  197. {
  198. Value v = next();
  199. if (v.getType() != "ex:i8") {
  200. throw new IOException("Error deserializing long");
  201. }
  202. t = strtoll(v.getValue().c_str(), NULL, 10);
  203. }
  204. void hadoop::IXmlArchive::deserialize(float& t, const char* tag)
  205. {
  206. Value v = next();
  207. if (v.getType() != "ex:float") {
  208. throw new IOException("Error deserializing float");
  209. }
  210. t = strtof(v.getValue().c_str(), NULL);
  211. }
  212. void hadoop::IXmlArchive::deserialize(double& t, const char* tag)
  213. {
  214. Value v = next();
  215. if (v.getType() != "double") {
  216. throw new IOException("Error deserializing double");
  217. }
  218. t = strtod(v.getValue().c_str(), NULL);
  219. }
  220. void hadoop::IXmlArchive::deserialize(std::string& t, const char* tag)
  221. {
  222. Value v = next();
  223. if (v.getType() != "string") {
  224. throw new IOException("Error deserializing string");
  225. }
  226. t = fromXMLString(v.getValue());
  227. }
  228. void hadoop::IXmlArchive::deserialize(std::string& t, size_t& len, const char* tag)
  229. {
  230. Value v = next();
  231. if (v.getType() != "string") {
  232. throw new IOException("Error deserializing buffer");
  233. }
  234. t = fromXMLBuffer(v.getValue(), len);
  235. }
  236. void hadoop::IXmlArchive::startRecord(Record& s, const char* tag)
  237. {
  238. Value v = next();
  239. if (v.getType() != "struct") {
  240. throw new IOException("Error deserializing record");
  241. }
  242. }
  243. void hadoop::IXmlArchive::endRecord(Record& s, const char* tag)
  244. {
  245. Value v = next();
  246. if (v.getType() != "/struct") {
  247. throw new IOException("Error deserializing record");
  248. }
  249. }
  250. Index* hadoop::IXmlArchive::startVector(const char* tag)
  251. {
  252. Value v = next();
  253. if (v.getType() != "array") {
  254. throw new IOException("Error deserializing vector");
  255. }
  256. return new XmlIndex(vlist, vidx);
  257. }
  258. void hadoop::IXmlArchive::endVector(Index* idx, const char* tag)
  259. {
  260. Value v = next();
  261. if (v.getType() != "/array") {
  262. throw new IOException("Error deserializing vector");
  263. }
  264. delete idx;
  265. }
  266. Index* hadoop::IXmlArchive::startMap(const char* tag)
  267. {
  268. Value v = next();
  269. if (v.getType() != "array") {
  270. throw new IOException("Error deserializing map");
  271. }
  272. return new XmlIndex(vlist, vidx);
  273. }
  274. void hadoop::IXmlArchive::endMap(Index* idx, const char* tag)
  275. {
  276. Value v = next();
  277. if (v.getType() != "/array") {
  278. throw new IOException("Error deserializing map");
  279. }
  280. delete idx;
  281. }
  282. void hadoop::OXmlArchive::serialize(int8_t t, const char* tag)
  283. {
  284. printBeginEnvelope(tag);
  285. p("<ex:i1>");
  286. char sval[5];
  287. sprintf(sval, "%d", t);
  288. p(sval);
  289. p("</ex:i1>");
  290. printEndEnvelope(tag);
  291. }
  292. void hadoop::OXmlArchive::serialize(bool t, const char* tag)
  293. {
  294. printBeginEnvelope(tag);
  295. p("<boolean>");
  296. p(t ? "1" : "0");
  297. p("</boolean>");
  298. printEndEnvelope(tag);
  299. }
  300. void hadoop::OXmlArchive::serialize(int32_t t, const char* tag)
  301. {
  302. printBeginEnvelope(tag);
  303. p("<i4>");
  304. char sval[128];
  305. sprintf(sval, "%d", t);
  306. p(sval);
  307. p("</i4>");
  308. printEndEnvelope(tag);
  309. }
  310. void hadoop::OXmlArchive::serialize(int64_t t, const char* tag)
  311. {
  312. printBeginEnvelope(tag);
  313. p("<ex:i8>");
  314. char sval[128];
  315. sprintf(sval, "%lld", t);
  316. p(sval);
  317. p("</ex:i8>");
  318. printEndEnvelope(tag);
  319. }
  320. void hadoop::OXmlArchive::serialize(float t, const char* tag)
  321. {
  322. printBeginEnvelope(tag);
  323. p("<ex:float>");
  324. char sval[128];
  325. sprintf(sval, "%f", t);
  326. p(sval);
  327. p("</ex:float>");
  328. printEndEnvelope(tag);
  329. }
  330. void hadoop::OXmlArchive::serialize(double t, const char* tag)
  331. {
  332. printBeginEnvelope(tag);
  333. p("<double>");
  334. char sval[128];
  335. sprintf(sval, "%lf", t);
  336. p(sval);
  337. p("</double>");
  338. printEndEnvelope(tag);
  339. }
  340. void hadoop::OXmlArchive::serialize(const std::string& t, const char* tag)
  341. {
  342. printBeginEnvelope(tag);
  343. p("<string>");
  344. std::string s = toXMLString(t);
  345. stream.write(s.data(), s.length());
  346. p("</string>");
  347. printEndEnvelope(tag);
  348. }
  349. void hadoop::OXmlArchive::serialize(const std::string& t, size_t len, const char* tag)
  350. {
  351. printBeginEnvelope(tag);
  352. p("<string>");
  353. std::string s = toXMLBuffer(t, len);
  354. stream.write(s.data(), s.length());
  355. p("</string>");
  356. printEndEnvelope(tag);
  357. }
  358. void hadoop::OXmlArchive::startRecord(const Record& s, const char* tag)
  359. {
  360. insideRecord(tag);
  361. p("<struct>\n");
  362. }
  363. void hadoop::OXmlArchive::endRecord(const Record& s, const char* tag)
  364. {
  365. p("</struct>\n");
  366. outsideRecord(tag);
  367. }
  368. void hadoop::OXmlArchive::startVector(size_t len, const char* tag)
  369. {
  370. insideVector(tag);
  371. p("<array>\n");
  372. }
  373. void hadoop::OXmlArchive::endVector(size_t len, const char* tag)
  374. {
  375. p("</array>\n");
  376. outsideVector(tag);
  377. }
  378. void hadoop::OXmlArchive::startMap(size_t len, const char* tag)
  379. {
  380. insideMap(tag);
  381. p("<array>\n");
  382. }
  383. void hadoop::OXmlArchive::endMap(size_t len, const char* tag)
  384. {
  385. p("</array>\n");
  386. outsideMap(tag);
  387. }
  388. hadoop::OXmlArchive::~OXmlArchive()
  389. {
  390. }