TestPrimitives.cc 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include "test_commons.h"
  19. TEST(Primitives, fmemcmp) {
  20. std::vector<std::string> vs;
  21. char buff[14];
  22. vs.push_back("");
  23. for (uint32_t i = 0; i < 5000; i += 7) {
  24. snprintf(buff, 14, "%d", i * 31);
  25. vs.push_back(buff);
  26. snprintf(buff, 10, "%010d", i);
  27. vs.push_back(buff);
  28. }
  29. for (size_t i = 0; i < vs.size(); i++) {
  30. for (size_t j = 0; j < vs.size(); j++) {
  31. std::string & ls = vs[i];
  32. std::string & rs = vs[j];
  33. size_t m = std::min(ls.length(), rs.length());
  34. int c = memcmp(ls.c_str(), rs.c_str(), m);
  35. int t = fmemcmp(ls.c_str(), rs.c_str(), m);
  36. if (!((c == 0 && t == 0) || (c > 0 && t > 0) || (c < 0 && t < 0))) {
  37. ASSERT_TRUE(false);
  38. }
  39. }
  40. }
  41. }
  42. static int test_memcmp() {
  43. uint8_t buff[2048];
  44. for (uint32_t i = 0; i < 2048; i++) {
  45. buff[i] = i & 0xff;
  46. }
  47. std::random_shuffle(buff, buff + 2048);
  48. int r = 0;
  49. for (uint32_t i = 0; i < 100000000; i++) {
  50. int offset = i % 1000;
  51. r += memcmp(buff, buff + 1024, 5);
  52. r += memcmp(buff + offset, buff + 1124, 9);
  53. r += memcmp(buff + offset, buff + 1224, 10);
  54. r += memcmp(buff + offset, buff + 1324, 15);
  55. r += memcmp(buff + offset, buff + 1424, 16);
  56. r += memcmp(buff + offset, buff + 1524, 17);
  57. r += memcmp(buff + offset, buff + 1624, 18);
  58. r += memcmp(buff + offset, buff + 1724, 19);
  59. }
  60. return r;
  61. }
  62. static int test_fmemcmp() {
  63. char buff[2048];
  64. for (uint32_t i = 0; i < 2048; i++) {
  65. buff[i] = i & 0xff;
  66. }
  67. std::random_shuffle(buff, buff + 2048);
  68. int r = 0;
  69. for (uint32_t i = 0; i < 100000000; i++) {
  70. int offset = i % 1000;
  71. r += fmemcmp(buff, buff + 1024, 5);
  72. r += fmemcmp(buff + offset, buff + 1124, 9);
  73. r += fmemcmp(buff + offset, buff + 1224, 10);
  74. r += fmemcmp(buff + offset, buff + 1324, 15);
  75. r += fmemcmp(buff + offset, buff + 1424, 16);
  76. r += fmemcmp(buff + offset, buff + 1524, 17);
  77. r += fmemcmp(buff + offset, buff + 1624, 18);
  78. r += fmemcmp(buff + offset, buff + 1724, 19);
  79. }
  80. return r;
  81. }
  82. TEST(Perf, fmemcmp) {
  83. Timer t;
  84. int a = test_memcmp();
  85. LOG("%s", t.getInterval(" memcmp ").c_str());
  86. t.reset();
  87. int b = test_fmemcmp();
  88. LOG("%s", t.getInterval(" fmemcmp ").c_str());
  89. // prevent compiler optimization
  90. TestConfig.setInt("tempvalue", a + b);
  91. }
  92. static void test_memcpy_perf_len(char * src, char * dest, size_t len, size_t time) {
  93. for (size_t i = 0; i < time; i++) {
  94. memcpy(src, dest, len);
  95. memcpy(dest, src, len);
  96. }
  97. }
  98. static void test_simple_memcpy_perf_len(char * src, char * dest, size_t len, size_t time) {
  99. for (size_t i = 0; i < time; i++) {
  100. simple_memcpy(src, dest, len);
  101. simple_memcpy(dest, src, len);
  102. }
  103. }
  104. TEST(Perf, simple_memcpy_small) {
  105. char * src = new char[10240];
  106. char * dest = new char[10240];
  107. char buff[32];
  108. for (size_t len = 1; len < 256; len = len + 2) {
  109. LOG("------------------------------");
  110. snprintf(buff, 32, " memcpy %luB\t", len);
  111. Timer t;
  112. test_memcpy_perf_len(src, dest, len, 1000000);
  113. LOG("%s", t.getInterval(buff).c_str());
  114. snprintf(buff, 32, "simple_memcpy %luB\t", len);
  115. t.reset();
  116. test_simple_memcpy_perf_len(src, dest, len, 1000000);
  117. LOG("%s", t.getInterval(buff).c_str());
  118. }
  119. delete[] src;
  120. delete[] dest;
  121. }
  122. inline char * memchrbrf4(char * p, char ch, size_t len) {
  123. ssize_t i = 0;
  124. for (; i < ((ssize_t)len) - 3; i += 3) {
  125. if (p[i] == ch) {
  126. return p + i;
  127. }
  128. if (p[i + 1] == ch) {
  129. return p + i + 1;
  130. }
  131. if (p[i + 2] == ch) {
  132. return p + i + 2;
  133. }
  134. }
  135. for (; i < len; i++) {
  136. if (p[i] == ch) {
  137. return p + i;
  138. }
  139. }
  140. return NULL;
  141. }
  142. inline char * memchrbrf2(char * p, char ch, size_t len) {
  143. for (size_t i = 0; i < len / 2; i += 2) {
  144. if (p[i] == ch) {
  145. return p + i;
  146. }
  147. if (p[i + 1] == ch) {
  148. return p + i + 1;
  149. }
  150. }
  151. if (len % 2 && p[len - 1] == ch) {
  152. return p + len - 1;
  153. }
  154. return NULL;
  155. }
  156. // not safe in MACOSX, segment fault, should be safe on Linux with out mmap
  157. inline int memchr_sse(const char *s, int c, int len) {
  158. //len : edx; c: esi; s:rdi
  159. int index = 0;
  160. #ifdef __X64
  161. __asm__ __volatile__(
  162. //"and $0xff, %%esi;" //clear upper bytes
  163. "movd %%esi, %%xmm1;"
  164. "mov $1, %%eax;"
  165. "add $16, %%edx;"
  166. "mov %%rdi ,%%r8;"
  167. "1:"
  168. "movdqu (%%rdi), %%xmm2;"
  169. "sub $16, %%edx;"
  170. "addq $16, %%rdi;"
  171. //"pcmpestri $0x0, %%xmm2,%%xmm1;"
  172. ".byte 0x66 ,0x0f ,0x3a ,0x61 ,0xca ,0x00;"
  173. //"lea 16(%%rdi), %%rdi;"
  174. "ja 1b;"//Res2==0:no match and zflag==0: s is not end
  175. "jc 3f;"//Res2==1: match and s is not end
  176. "mov $0xffffffff, %%eax;"//no match
  177. "jmp 0f;"
  178. "3:"
  179. "sub %%r8, %%rdi;"
  180. "lea -16(%%edi,%%ecx),%%eax;"
  181. "0:"
  182. // "mov %%eax, %0;"
  183. :"=a"(index),"=D"(s),"=S"(c),"=d"(len)
  184. :"D"(s),"S"(c),"d"(len)
  185. :"rcx","r8","memory"
  186. );
  187. #endif
  188. return index;
  189. }
  190. TEST(Perf, memchr) {
  191. Random r;
  192. int32_t size = 100 * 1024 * 1024;
  193. int32_t lineLength = TestConfig.getInt("memchr.line.length", 100);
  194. char * buff = new char[size + 16];
  195. memset(buff, 'a', size);
  196. for (int i = 0; i < size / lineLength; i++) {
  197. buff[r.next_int32(size)] = '\n';
  198. }
  199. Timer timer;
  200. char * pos = buff;
  201. int count = 0;
  202. while (true) {
  203. if (pos == buff + size) {
  204. break;
  205. }
  206. pos = (char*)memchr(pos, '\n', buff + size - pos);
  207. if (pos == NULL) {
  208. break;
  209. }
  210. pos++;
  211. count++;
  212. }
  213. LOG("%s", timer.getSpeedM2("memchr bytes/lines", size, count).c_str());
  214. timer.reset();
  215. pos = buff;
  216. count = 0;
  217. while (true) {
  218. if (pos == buff + size) {
  219. break;
  220. }
  221. pos = (char*)memchrbrf2(pos, '\n', buff + size - pos);
  222. if (pos == NULL) {
  223. break;
  224. }
  225. pos++;
  226. count++;
  227. }
  228. LOG("%s", timer.getSpeedM2("memchrbrf2 bytes/lines", size, count).c_str());
  229. timer.reset();
  230. pos = buff;
  231. count = 0;
  232. while (true) {
  233. if (pos == buff + size) {
  234. break;
  235. }
  236. pos = (char*)memchrbrf4(pos, '\n', buff + size - pos);
  237. if (pos == NULL) {
  238. break;
  239. }
  240. pos++;
  241. count++;
  242. }
  243. LOG("%s", timer.getSpeedM2("memchrbrf4 bytes/lines", size, count).c_str());
  244. timer.reset();
  245. pos = buff;
  246. count = 0;
  247. while (true) {
  248. if (pos == buff + size) {
  249. break;
  250. }
  251. int ret = memchr_sse(pos, '\n', buff + size - pos);
  252. if (ret == -1) {
  253. break;
  254. }
  255. pos = pos + ret;
  256. pos++;
  257. count++;
  258. }
  259. LOG("%s", timer.getSpeedM2("memchr_sse bytes/lines", size, count).c_str());
  260. delete[] buff;
  261. }
  262. TEST(Perf, memcpy_batch) {
  263. int32_t size = TestConfig.getInt("input.size", 64 * 1024);
  264. size_t mb = TestConfig.getInt("input.mb", 320) * 1024 * 1024UL;
  265. char * src = new char[size];
  266. char * dest = new char[size];
  267. memset(src, 0, size);
  268. memset(dest, 0, size);
  269. Timer t;
  270. for (size_t i = 0; i < mb; i += size) {
  271. memcpy(dest, src, size);
  272. }
  273. LOG("%s", t.getSpeedM("memcpy", mb).c_str());
  274. t.reset();
  275. for (size_t i = 0; i < mb; i += size) {
  276. simple_memcpy(dest, src, size);
  277. }
  278. LOG("%s", t.getSpeedM("simple_memcpy", mb).c_str());
  279. delete[] src;
  280. delete[] dest;
  281. }