shorten.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include <ctype.h>
  19. #include <errno.h>
  20. #include <limits.h>
  21. #include <stdio.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #define MAX_LINE_LEN 16384
  25. #define IFNDEF "#ifndef"
  26. #define IFNDEF_LEN (sizeof(IFNDEF) - 1)
  27. enum parse_state {
  28. PARSE_IFNDEF = 0,
  29. PARSE_STRUCTS_AND_ENUMS,
  30. PARSE_MESSAGES,
  31. PARSE_DONE,
  32. };
  33. #define PROTOBUF_C_END_DECLS_STR "PROTOBUF_C_END_DECLS"
  34. static const char *PARSE_STATE_TERMINATORS[] = {
  35. "PROTOBUF_C_BEGIN_DECLS",
  36. "/* --- messages --- */",
  37. PROTOBUF_C_END_DECLS_STR
  38. };
  39. static const char *MESSAGE_SUFFIXES[] = {
  40. "__INIT",
  41. "__get_packed_size",
  42. "__pack",
  43. "__pack_to_buffer",
  44. "__unpack",
  45. "__free_unpacked",
  46. };
  47. #define NUM_MESSAGE_SUFFIXES \
  48. (sizeof(MESSAGE_SUFFIXES) / sizeof(MESSAGE_SUFFIXES[0]))
  49. static void add_word(char ***words, size_t *num_words, const char *word)
  50. {
  51. size_t new_num_words;
  52. char *nword;
  53. char **nwords;
  54. new_num_words = *num_words + 1;
  55. nword = strdup(word);
  56. if (!nword) {
  57. fprintf(stderr, "failed to allocate memory for %Zd words\n",
  58. new_num_words);
  59. exit(1);
  60. }
  61. nwords = realloc(*words, sizeof(char **) * new_num_words);
  62. if (!nwords) {
  63. fprintf(stderr, "failed to allocate memory for %Zd words\n",
  64. new_num_words);
  65. free(nword);
  66. exit(1);
  67. }
  68. nwords[new_num_words - 1] = nword;
  69. *num_words = new_num_words;
  70. *words = nwords;
  71. }
  72. static int has_suffix(const char *str, const char *suffix)
  73. {
  74. int str_len = strlen(str);
  75. int suffix_len = strlen(suffix);
  76. if (str_len < suffix_len)
  77. return 0;
  78. return strcmp(str + str_len - suffix_len, suffix) == 0;
  79. }
  80. static int has_message_suffix(const char *word)
  81. {
  82. size_t i = 0;
  83. for (i = 0; i < NUM_MESSAGE_SUFFIXES; i++) {
  84. if (has_suffix(word, MESSAGE_SUFFIXES[i]))
  85. return 1;
  86. }
  87. return 0;
  88. }
  89. static void add_words(char ***words, size_t *num_words,
  90. char *line, enum parse_state state)
  91. {
  92. char *word, *ptr = NULL;
  93. for (word = strtok_r(line, " ", &ptr); word;
  94. word = strtok_r(NULL, " ", &ptr)) {
  95. if (word[0] == '_')
  96. continue;
  97. if (!strstr(word, "__"))
  98. continue;
  99. if ((state == PARSE_MESSAGES) && (!has_message_suffix(word)))
  100. continue;
  101. add_word(words, num_words, word);
  102. }
  103. }
  104. static int compare_strings(const void *a, const void *b)
  105. {
  106. return strcmp(*(char * const*)a, *(char * const*)b);
  107. }
  108. static char *get_last_occurrence(char *haystack, const char *needle)
  109. {
  110. char *val = NULL, *nval;
  111. int needle_len = strlen(needle);
  112. while (1) {
  113. nval = strstr(haystack, needle);
  114. if (!nval)
  115. return val;
  116. val = nval + needle_len;
  117. haystack = nval + needle_len;
  118. }
  119. }
  120. static char *get_second_last_occurrence(char *haystack, const char *needle)
  121. {
  122. char *pval = NULL, *val = NULL, *nval;
  123. int needle_len = strlen(needle);
  124. while (1) {
  125. nval = strstr(haystack, needle);
  126. if (!nval)
  127. return pval;
  128. pval = val;
  129. val = nval + needle_len;
  130. haystack = nval + needle_len;
  131. }
  132. }
  133. static int has_camel_case(const char *str)
  134. {
  135. int i, prev_lower = 0;
  136. for (i = 0; str[i]; i++) {
  137. if (isupper(str[i])) {
  138. if (prev_lower)
  139. return 1;
  140. } else if (islower(str[i])) {
  141. prev_lower = 1;
  142. }
  143. }
  144. return 0;
  145. }
  146. static char *get_shortened_occurrence(char *str)
  147. {
  148. char *last, *slast;
  149. last = get_last_occurrence(str, "__");
  150. slast = get_second_last_occurrence(str, "__");
  151. last = get_last_occurrence(str, "__");
  152. if (!last)
  153. return NULL;
  154. if ((!has_message_suffix(str)) &&
  155. (strstr(last, "_") || has_camel_case(last))) {
  156. // Heuristic: if the last bit of the string after the double underscore
  157. // has another underscore inside, or has mixed case, we assume it's
  158. // complex enough to use on its own.
  159. return last;
  160. }
  161. // Otherwise, we grab the part of the string after the second-last double
  162. // underscore.
  163. slast = get_second_last_occurrence(str, "__");
  164. return slast ? slast : last;
  165. }
  166. static int output_shortening_macros(char **words, size_t num_words,
  167. const char *out_path, FILE *out)
  168. {
  169. size_t i;
  170. const char *prev_word = "";
  171. const char *shortened;
  172. for (i = 0; i < num_words; i++) {
  173. if (strcmp(prev_word, words[i]) == 0) {
  174. // skip words we've already done
  175. continue;
  176. }
  177. prev_word = words[i];
  178. shortened = get_shortened_occurrence(words[i]);
  179. if (shortened) {
  180. if (fprintf(out, "#define %s %s\n", shortened, words[i]) < 0) {
  181. fprintf(stderr, "error writing to %s\n", out_path);
  182. return EIO;
  183. }
  184. }
  185. }
  186. return 0;
  187. }
  188. /**
  189. * Remove newlines from a buffer.
  190. *
  191. * @param line The buffer.
  192. */
  193. static void chomp(char *line)
  194. {
  195. while (1) {
  196. int len = strlen(line);
  197. if (len == 0) {
  198. return;
  199. }
  200. if (line[len - 1] != '\n') {
  201. return;
  202. }
  203. line[len - 1] = '\0';
  204. }
  205. }
  206. /**
  207. * Remove most non-alphanumeric characters from a buffer.
  208. *
  209. * @param line The buffer.
  210. */
  211. static void asciify(char *line)
  212. {
  213. int i;
  214. for (i = 0; line[i]; i++) {
  215. if ((!isalnum(line[i])) && (line[i] != '_') && (line[i] != '#')) {
  216. line[i] = ' ';
  217. }
  218. }
  219. }
  220. static const char *base_name(const char *path)
  221. {
  222. const char *base;
  223. base = rindex(path, '/');
  224. if (!base)
  225. return NULL;
  226. return base + 1;
  227. }
  228. static int process_file_lines(const char *in_path, const char *out_path,
  229. FILE *in, FILE *out, char ***words, size_t *num_words)
  230. {
  231. int ret;
  232. char header_guard[MAX_LINE_LEN] = { 0 };
  233. char line[MAX_LINE_LEN] = { 0 };
  234. const char *base = base_name(in_path);
  235. enum parse_state state = PARSE_IFNDEF;
  236. if (!base) {
  237. fprintf(stderr, "failed to get basename of %s\n", in_path);
  238. return EINVAL;
  239. }
  240. while (1) {
  241. if (!fgets(line, MAX_LINE_LEN - 1, in)) {
  242. if (ferror(in)) {
  243. ret = errno;
  244. fprintf(stderr, "error reading %s: %s (%d)\n",
  245. in_path, strerror(ret), ret);
  246. return ret;
  247. }
  248. fprintf(stderr, "error reading %s: didn't find "
  249. PROTOBUF_C_END_DECLS_STR, in_path);
  250. return EINVAL;
  251. }
  252. if (strstr(line, PARSE_STATE_TERMINATORS[state])) {
  253. state = state + 1;
  254. if (state == PARSE_DONE) {
  255. break;
  256. }
  257. continue;
  258. }
  259. chomp(line);
  260. asciify(line);
  261. switch (state) {
  262. case PARSE_IFNDEF:
  263. if (strncmp(line, IFNDEF, IFNDEF_LEN) == 0) {
  264. strcpy(header_guard, line + IFNDEF_LEN + 1);
  265. }
  266. break;
  267. default:
  268. add_words(words, num_words, line, state);
  269. break;
  270. }
  271. }
  272. if (!header_guard[0]) {
  273. fprintf(stderr, "failed to find header guard for %s\n", in_path);
  274. return EINVAL;
  275. }
  276. qsort(*words, *num_words, sizeof(char*), compare_strings);
  277. fprintf(out, "#ifndef %s_S\n", header_guard);
  278. fprintf(out, "#define %s_S\n\n", header_guard);
  279. fprintf(out, "#include \"%s\"\n\n", base);
  280. ret = output_shortening_macros(*words, *num_words, out_path, out);
  281. if (ret)
  282. return ret;
  283. fprintf(out, "\n#endif\n");
  284. return 0;
  285. }
  286. static int process_file(const char *in_path)
  287. {
  288. char out_path[PATH_MAX] = { 0 };
  289. int res, ret = 0;
  290. FILE *in = NULL, *out = NULL;
  291. char **words = NULL;
  292. size_t num_words = 0;
  293. size_t i;
  294. res = snprintf(out_path, PATH_MAX, "%s.s", in_path);
  295. if ((res < 0) || (res >= PATH_MAX)) {
  296. fprintf(stderr, "snprintf error for %s\n", in_path);
  297. ret = EINVAL;
  298. goto done;
  299. }
  300. in = fopen(in_path, "r");
  301. if (!in) {
  302. ret = errno;
  303. fprintf(stderr, "failed to open %s for read: error %s (%d)\n",
  304. in_path, strerror(ret), ret);
  305. goto done;
  306. }
  307. out = fopen(out_path, "w");
  308. if (!out) {
  309. ret = errno;
  310. fprintf(stderr, "failed to open %s for write: error %s (%d)\n",
  311. out_path, strerror(ret), ret);
  312. goto done;
  313. }
  314. ret = process_file_lines(in_path, out_path, in, out, &words, &num_words);
  315. for (i = 0; i < num_words; i++) {
  316. free(words[i]);
  317. }
  318. free(words);
  319. if (ret) {
  320. goto done;
  321. }
  322. if (fclose(out)) {
  323. ret = errno;
  324. perror("fclose error");
  325. }
  326. out = NULL;
  327. done:
  328. if (in) {
  329. fclose(in);
  330. }
  331. if (out) {
  332. fclose(out);
  333. }
  334. return ret;
  335. }
  336. static void usage(void)
  337. {
  338. fprintf(stderr,
  339. "shorten: creates header files with shorter definitions for protobuf-c\n"
  340. "definitions. Output files will be written to the same paths as input\n"
  341. "files, but with a .s extension tacked on.\n"
  342. "\n"
  343. "usage: shorten [paths-to-headers]\n");
  344. }
  345. int main(int argc, char **argv)
  346. {
  347. int i, ret, nproc = 0, rval = EXIT_SUCCESS;
  348. if (argc < 2) {
  349. usage();
  350. exit(EXIT_SUCCESS);
  351. }
  352. for (i = 1; i < argc; i++) {
  353. ret = process_file(argv[i]);
  354. if (ret) {
  355. fprintf(stderr, "error processing %s\n", argv[i]);
  356. rval = EXIT_FAILURE;
  357. } else {
  358. nproc++;
  359. }
  360. }
  361. //fprintf(stderr, "successfully processed %d files\n", nproc);
  362. return rval;
  363. }
  364. // vim: ts=4:sw=4:tw=79:et