xxhash.c 33 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024
  1. /*
  2. * xxHash - Fast Hash algorithm
  3. * Copyright (C) 2012-2016, Yann Collet
  4. *
  5. * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions are
  9. * met:
  10. *
  11. * * Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * * Redistributions in binary form must reproduce the above
  14. * copyright notice, this list of conditions and the following disclaimer
  15. * in the documentation and/or other materials provided with the
  16. * distribution.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. *
  30. * You can contact the author at :
  31. * - xxHash homepage: http://www.xxhash.com
  32. * - xxHash source repository : https://github.com/Cyan4973/xxHash
  33. */
  34. /* *************************************
  35. * Tuning parameters
  36. ***************************************/
  37. /*!XXH_FORCE_MEMORY_ACCESS :
  38. * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
  39. * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
  40. * The below switch allow to select different access method for improved performance.
  41. * Method 0 (default) : use `memcpy()`. Safe and portable.
  42. * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
  43. * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
  44. * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
  45. * It can generate buggy code on targets which do not support unaligned memory accesses.
  46. * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
  47. * See http://stackoverflow.com/a/32095106/646947 for details.
  48. * Prefer these methods in priority order (0 > 1 > 2)
  49. */
  50. #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
  51. # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
  52. || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
  53. || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
  54. # define XXH_FORCE_MEMORY_ACCESS 2
  55. # elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
  56. (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
  57. || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
  58. || defined(__ARM_ARCH_7S__) ))
  59. # define XXH_FORCE_MEMORY_ACCESS 1
  60. # endif
  61. #endif
  62. /*!XXH_ACCEPT_NULL_INPUT_POINTER :
  63. * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault.
  64. * When this macro is enabled, xxHash actively checks input for null pointer.
  65. * It it is, result for null input pointers is the same as a null-length input.
  66. */
  67. #ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */
  68. # define XXH_ACCEPT_NULL_INPUT_POINTER 0
  69. #endif
  70. /*!XXH_FORCE_ALIGN_CHECK :
  71. * This is a minor performance trick, only useful with lots of very small keys.
  72. * It means : check for aligned/unaligned input.
  73. * The check costs one initial branch per hash;
  74. * set it to 0 when the input is guaranteed to be aligned,
  75. * or when alignment doesn't matter for performance.
  76. */
  77. #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
  78. # if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
  79. # define XXH_FORCE_ALIGN_CHECK 0
  80. # else
  81. # define XXH_FORCE_ALIGN_CHECK 1
  82. # endif
  83. #endif
  84. /* *************************************
  85. * Includes & Memory related functions
  86. ***************************************/
  87. /*! Modify the local functions below should you wish to use some other memory routines
  88. * for malloc(), free() */
  89. #include <stdlib.h>
  90. static void* XXH_malloc(size_t s) { return malloc(s); }
  91. static void XXH_free (void* p) { free(p); }
  92. /*! and for memcpy() */
  93. #include <string.h>
  94. static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
  95. #include <assert.h> /* assert */
  96. #define XXH_STATIC_LINKING_ONLY
  97. #include "xxhash.h"
  98. /* *************************************
  99. * Compiler Specific Options
  100. ***************************************/
  101. #ifdef _MSC_VER /* Visual Studio */
  102. # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
  103. # define XXH_FORCE_INLINE static __forceinline
  104. # define XXH_NO_INLINE static __declspec(noinline)
  105. #else
  106. # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
  107. # ifdef __GNUC__
  108. # define XXH_FORCE_INLINE static inline __attribute__((always_inline))
  109. # define XXH_NO_INLINE static __attribute__((noinline))
  110. # else
  111. # define XXH_FORCE_INLINE static inline
  112. # define XXH_NO_INLINE static
  113. # endif
  114. # else
  115. # define XXH_FORCE_INLINE static
  116. # define XXH_NO_INLINE static
  117. # endif /* __STDC_VERSION__ */
  118. #endif
  119. /* *************************************
  120. * Basic Types
  121. ***************************************/
  122. #ifndef MEM_MODULE
  123. # if !defined (__VMS) \
  124. && (defined (__cplusplus) \
  125. || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
  126. # include <stdint.h>
  127. typedef uint8_t BYTE;
  128. typedef uint16_t U16;
  129. typedef uint32_t U32;
  130. # else
  131. typedef unsigned char BYTE;
  132. typedef unsigned short U16;
  133. typedef unsigned int U32;
  134. # endif
  135. #endif
  136. /* === Memory access === */
  137. #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
  138. /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
  139. static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
  140. #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
  141. /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
  142. /* currently only defined for gcc and icc */
  143. typedef union { U32 u32; } __attribute__((packed)) unalign;
  144. static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
  145. #else
  146. /* portable and safe solution. Generally efficient.
  147. * see : http://stackoverflow.com/a/32095106/646947
  148. */
  149. static U32 XXH_read32(const void* memPtr)
  150. {
  151. U32 val;
  152. memcpy(&val, memPtr, sizeof(val));
  153. return val;
  154. }
  155. #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
  156. /* === Endianess === */
  157. typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
  158. /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
  159. #ifndef XXH_CPU_LITTLE_ENDIAN
  160. static int XXH_isLittleEndian(void)
  161. {
  162. const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
  163. return one.c[0];
  164. }
  165. # define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
  166. #endif
  167. /* ****************************************
  168. * Compiler-specific Functions and Macros
  169. ******************************************/
  170. #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
  171. /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
  172. #if defined(_MSC_VER)
  173. # define XXH_rotl32(x,r) _rotl(x,r)
  174. # define XXH_rotl64(x,r) _rotl64(x,r)
  175. #else
  176. # define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
  177. # define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
  178. #endif
  179. #if defined(_MSC_VER) /* Visual Studio */
  180. # define XXH_swap32 _byteswap_ulong
  181. #elif XXH_GCC_VERSION >= 403
  182. # define XXH_swap32 __builtin_bswap32
  183. #else
  184. static U32 XXH_swap32 (U32 x)
  185. {
  186. return ((x << 24) & 0xff000000 ) |
  187. ((x << 8) & 0x00ff0000 ) |
  188. ((x >> 8) & 0x0000ff00 ) |
  189. ((x >> 24) & 0x000000ff );
  190. }
  191. #endif
  192. /* ***************************
  193. * Memory reads
  194. *****************************/
  195. typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
  196. XXH_FORCE_INLINE U32 XXH_readLE32(const void* ptr)
  197. {
  198. return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
  199. }
  200. static U32 XXH_readBE32(const void* ptr)
  201. {
  202. return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
  203. }
  204. XXH_FORCE_INLINE U32
  205. XXH_readLE32_align(const void* ptr, XXH_alignment align)
  206. {
  207. if (align==XXH_unaligned) {
  208. return XXH_readLE32(ptr);
  209. } else {
  210. return XXH_CPU_LITTLE_ENDIAN ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
  211. }
  212. }
  213. /* *************************************
  214. * Macros
  215. ***************************************/
  216. #define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; } /* use after variable declarations */
  217. XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
  218. /* *******************************************************************
  219. * 32-bit hash functions
  220. *********************************************************************/
  221. static const U32 PRIME32_1 = 2654435761U; /* 0b10011110001101110111100110110001 */
  222. static const U32 PRIME32_2 = 2246822519U; /* 0b10000101111010111100101001110111 */
  223. static const U32 PRIME32_3 = 3266489917U; /* 0b11000010101100101010111000111101 */
  224. static const U32 PRIME32_4 = 668265263U; /* 0b00100111110101001110101100101111 */
  225. static const U32 PRIME32_5 = 374761393U; /* 0b00010110010101100110011110110001 */
  226. static U32 XXH32_round(U32 acc, U32 input)
  227. {
  228. acc += input * PRIME32_2;
  229. acc = XXH_rotl32(acc, 13);
  230. acc *= PRIME32_1;
  231. #if defined(__GNUC__) && defined(__SSE4_1__) && !defined(XXH_ENABLE_AUTOVECTORIZE)
  232. /* UGLY HACK:
  233. * This inline assembly hack forces acc into a normal register. This is the
  234. * only thing that prevents GCC and Clang from autovectorizing the XXH32 loop
  235. * (pragmas and attributes don't work for some resason) without globally
  236. * disabling SSE4.1.
  237. *
  238. * The reason we want to avoid vectorization is because despite working on
  239. * 4 integers at a time, there are multiple factors slowing XXH32 down on
  240. * SSE4:
  241. * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on newer chips!)
  242. * making it slightly slower to multiply four integers at once compared to four
  243. * integers independently. Even when pmulld was fastest, Sandy/Ivy Bridge, it is
  244. * still not worth it to go into SSE just to multiply unless doing a long operation.
  245. *
  246. * - Four instructions are required to rotate,
  247. * movqda tmp, v // not required with VEX encoding
  248. * pslld tmp, 13 // tmp <<= 13
  249. * psrld v, 19 // x >>= 19
  250. * por v, tmp // x |= tmp
  251. * compared to one for scalar:
  252. * roll v, 13 // reliably fast across the board
  253. * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason
  254. *
  255. * - Instruction level parallelism is actually more beneficial here because the
  256. * SIMD actually serializes this operation: While v1 is rotating, v2 can load data,
  257. * while v3 can multiply. SSE forces them to operate together.
  258. *
  259. * How this hack works:
  260. * __asm__("" // Declare an assembly block but don't declare any instructions
  261. * : // However, as an Input/Output Operand,
  262. * "+r" // constrain a read/write operand (+) as a general purpose register (r).
  263. * (acc) // and set acc as the operand
  264. * );
  265. *
  266. * Because of the 'r', the compiler has promised that seed will be in a
  267. * general purpose register and the '+' says that it will be 'read/write',
  268. * so it has to assume it has changed. It is like volatile without all the
  269. * loads and stores.
  270. *
  271. * Since the argument has to be in a normal register (not an SSE register),
  272. * each time XXH32_round is called, it is impossible to vectorize. */
  273. __asm__("" : "+r" (acc));
  274. #endif
  275. return acc;
  276. }
  277. /* mix all bits */
  278. static U32 XXH32_avalanche(U32 h32)
  279. {
  280. h32 ^= h32 >> 15;
  281. h32 *= PRIME32_2;
  282. h32 ^= h32 >> 13;
  283. h32 *= PRIME32_3;
  284. h32 ^= h32 >> 16;
  285. return(h32);
  286. }
  287. #define XXH_get32bits(p) XXH_readLE32_align(p, align)
  288. static U32
  289. XXH32_finalize(U32 h32, const void* ptr, size_t len, XXH_alignment align)
  290. {
  291. const BYTE* p = (const BYTE*)ptr;
  292. #define PROCESS1 \
  293. h32 += (*p++) * PRIME32_5; \
  294. h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
  295. #define PROCESS4 \
  296. h32 += XXH_get32bits(p) * PRIME32_3; \
  297. p+=4; \
  298. h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
  299. switch(len&15) /* or switch(bEnd - p) */
  300. {
  301. case 12: PROCESS4;
  302. /* fallthrough */
  303. case 8: PROCESS4;
  304. /* fallthrough */
  305. case 4: PROCESS4;
  306. return XXH32_avalanche(h32);
  307. case 13: PROCESS4;
  308. /* fallthrough */
  309. case 9: PROCESS4;
  310. /* fallthrough */
  311. case 5: PROCESS4;
  312. PROCESS1;
  313. return XXH32_avalanche(h32);
  314. case 14: PROCESS4;
  315. /* fallthrough */
  316. case 10: PROCESS4;
  317. /* fallthrough */
  318. case 6: PROCESS4;
  319. PROCESS1;
  320. PROCESS1;
  321. return XXH32_avalanche(h32);
  322. case 15: PROCESS4;
  323. /* fallthrough */
  324. case 11: PROCESS4;
  325. /* fallthrough */
  326. case 7: PROCESS4;
  327. /* fallthrough */
  328. case 3: PROCESS1;
  329. /* fallthrough */
  330. case 2: PROCESS1;
  331. /* fallthrough */
  332. case 1: PROCESS1;
  333. /* fallthrough */
  334. case 0: return XXH32_avalanche(h32);
  335. }
  336. assert(0);
  337. return h32; /* reaching this point is deemed impossible */
  338. }
  339. XXH_FORCE_INLINE U32
  340. XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_alignment align)
  341. {
  342. const BYTE* p = (const BYTE*)input;
  343. const BYTE* bEnd = p + len;
  344. U32 h32;
  345. #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
  346. if (p==NULL) {
  347. len=0;
  348. bEnd=p=(const BYTE*)(size_t)16;
  349. }
  350. #endif
  351. if (len>=16) {
  352. const BYTE* const limit = bEnd - 15;
  353. U32 v1 = seed + PRIME32_1 + PRIME32_2;
  354. U32 v2 = seed + PRIME32_2;
  355. U32 v3 = seed + 0;
  356. U32 v4 = seed - PRIME32_1;
  357. do {
  358. v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
  359. v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
  360. v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
  361. v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
  362. } while (p < limit);
  363. h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7)
  364. + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
  365. } else {
  366. h32 = seed + PRIME32_5;
  367. }
  368. h32 += (U32)len;
  369. return XXH32_finalize(h32, p, len&15, align);
  370. }
  371. XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
  372. {
  373. #if 0
  374. /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
  375. XXH32_state_t state;
  376. XXH32_reset(&state, seed);
  377. XXH32_update(&state, input, len);
  378. return XXH32_digest(&state);
  379. #else
  380. if (XXH_FORCE_ALIGN_CHECK) {
  381. if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
  382. return XXH32_endian_align(input, len, seed, XXH_aligned);
  383. } }
  384. return XXH32_endian_align(input, len, seed, XXH_unaligned);
  385. #endif
  386. }
  387. /*====== Hash streaming ======*/
  388. XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
  389. {
  390. return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
  391. }
  392. XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
  393. {
  394. XXH_free(statePtr);
  395. return XXH_OK;
  396. }
  397. XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
  398. {
  399. memcpy(dstState, srcState, sizeof(*dstState));
  400. }
  401. XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
  402. {
  403. XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
  404. memset(&state, 0, sizeof(state));
  405. state.v1 = seed + PRIME32_1 + PRIME32_2;
  406. state.v2 = seed + PRIME32_2;
  407. state.v3 = seed + 0;
  408. state.v4 = seed - PRIME32_1;
  409. /* do not write into reserved, planned to be removed in a future version */
  410. memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
  411. return XXH_OK;
  412. }
  413. XXH_PUBLIC_API XXH_errorcode
  414. XXH32_update(XXH32_state_t* state, const void* input, size_t len)
  415. {
  416. if (input==NULL)
  417. #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
  418. return XXH_OK;
  419. #else
  420. return XXH_ERROR;
  421. #endif
  422. { const BYTE* p = (const BYTE*)input;
  423. const BYTE* const bEnd = p + len;
  424. state->total_len_32 += (XXH32_hash_t)len;
  425. state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
  426. if (state->memsize + len < 16) { /* fill in tmp buffer */
  427. XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
  428. state->memsize += (XXH32_hash_t)len;
  429. return XXH_OK;
  430. }
  431. if (state->memsize) { /* some data left from previous update */
  432. XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
  433. { const U32* p32 = state->mem32;
  434. state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++;
  435. state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++;
  436. state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++;
  437. state->v4 = XXH32_round(state->v4, XXH_readLE32(p32));
  438. }
  439. p += 16-state->memsize;
  440. state->memsize = 0;
  441. }
  442. if (p <= bEnd-16) {
  443. const BYTE* const limit = bEnd - 16;
  444. U32 v1 = state->v1;
  445. U32 v2 = state->v2;
  446. U32 v3 = state->v3;
  447. U32 v4 = state->v4;
  448. do {
  449. v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4;
  450. v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4;
  451. v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4;
  452. v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4;
  453. } while (p<=limit);
  454. state->v1 = v1;
  455. state->v2 = v2;
  456. state->v3 = v3;
  457. state->v4 = v4;
  458. }
  459. if (p < bEnd) {
  460. XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
  461. state->memsize = (unsigned)(bEnd-p);
  462. }
  463. }
  464. return XXH_OK;
  465. }
  466. XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state)
  467. {
  468. U32 h32;
  469. if (state->large_len) {
  470. h32 = XXH_rotl32(state->v1, 1)
  471. + XXH_rotl32(state->v2, 7)
  472. + XXH_rotl32(state->v3, 12)
  473. + XXH_rotl32(state->v4, 18);
  474. } else {
  475. h32 = state->v3 /* == seed */ + PRIME32_5;
  476. }
  477. h32 += state->total_len_32;
  478. return XXH32_finalize(h32, state->mem32, state->memsize, XXH_aligned);
  479. }
  480. /*====== Canonical representation ======*/
  481. /*! Default XXH result types are basic unsigned 32 and 64 bits.
  482. * The canonical representation follows human-readable write convention, aka big-endian (large digits first).
  483. * These functions allow transformation of hash result into and from its canonical format.
  484. * This way, hash values can be written into a file or buffer, remaining comparable across different systems.
  485. */
  486. XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
  487. {
  488. XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
  489. if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
  490. memcpy(dst, &hash, sizeof(*dst));
  491. }
  492. XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
  493. {
  494. return XXH_readBE32(src);
  495. }
  496. #ifndef XXH_NO_LONG_LONG
  497. /* *******************************************************************
  498. * 64-bit hash functions
  499. *********************************************************************/
  500. /*====== Memory access ======*/
  501. #ifndef MEM_MODULE
  502. # define MEM_MODULE
  503. # if !defined (__VMS) \
  504. && (defined (__cplusplus) \
  505. || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
  506. # include <stdint.h>
  507. typedef uint64_t U64;
  508. # else
  509. /* if compiler doesn't support unsigned long long, replace by another 64-bit type */
  510. typedef unsigned long long U64;
  511. # endif
  512. #endif
  513. #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
  514. /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
  515. static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
  516. #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
  517. /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
  518. /* currently only defined for gcc and icc */
  519. typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64;
  520. static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
  521. #else
  522. /* portable and safe solution. Generally efficient.
  523. * see : http://stackoverflow.com/a/32095106/646947
  524. */
  525. static U64 XXH_read64(const void* memPtr)
  526. {
  527. U64 val;
  528. memcpy(&val, memPtr, sizeof(val));
  529. return val;
  530. }
  531. #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
  532. #if defined(_MSC_VER) /* Visual Studio */
  533. # define XXH_swap64 _byteswap_uint64
  534. #elif XXH_GCC_VERSION >= 403
  535. # define XXH_swap64 __builtin_bswap64
  536. #else
  537. static U64 XXH_swap64 (U64 x)
  538. {
  539. return ((x << 56) & 0xff00000000000000ULL) |
  540. ((x << 40) & 0x00ff000000000000ULL) |
  541. ((x << 24) & 0x0000ff0000000000ULL) |
  542. ((x << 8) & 0x000000ff00000000ULL) |
  543. ((x >> 8) & 0x00000000ff000000ULL) |
  544. ((x >> 24) & 0x0000000000ff0000ULL) |
  545. ((x >> 40) & 0x000000000000ff00ULL) |
  546. ((x >> 56) & 0x00000000000000ffULL);
  547. }
  548. #endif
  549. XXH_FORCE_INLINE U64 XXH_readLE64(const void* ptr)
  550. {
  551. return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
  552. }
  553. static U64 XXH_readBE64(const void* ptr)
  554. {
  555. return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
  556. }
  557. XXH_FORCE_INLINE U64
  558. XXH_readLE64_align(const void* ptr, XXH_alignment align)
  559. {
  560. if (align==XXH_unaligned)
  561. return XXH_readLE64(ptr);
  562. else
  563. return XXH_CPU_LITTLE_ENDIAN ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
  564. }
  565. /*====== xxh64 ======*/
  566. static const U64 PRIME64_1 = 11400714785074694791ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
  567. static const U64 PRIME64_2 = 14029467366897019727ULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
  568. static const U64 PRIME64_3 = 1609587929392839161ULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001 */
  569. static const U64 PRIME64_4 = 9650029242287828579ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011 */
  570. static const U64 PRIME64_5 = 2870177450012600261ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101 */
  571. static U64 XXH64_round(U64 acc, U64 input)
  572. {
  573. acc += input * PRIME64_2;
  574. acc = XXH_rotl64(acc, 31);
  575. acc *= PRIME64_1;
  576. return acc;
  577. }
  578. static U64 XXH64_mergeRound(U64 acc, U64 val)
  579. {
  580. val = XXH64_round(0, val);
  581. acc ^= val;
  582. acc = acc * PRIME64_1 + PRIME64_4;
  583. return acc;
  584. }
  585. static U64 XXH64_avalanche(U64 h64)
  586. {
  587. h64 ^= h64 >> 33;
  588. h64 *= PRIME64_2;
  589. h64 ^= h64 >> 29;
  590. h64 *= PRIME64_3;
  591. h64 ^= h64 >> 32;
  592. return h64;
  593. }
  594. #define XXH_get64bits(p) XXH_readLE64_align(p, align)
  595. static U64
  596. XXH64_finalize(U64 h64, const void* ptr, size_t len, XXH_alignment align)
  597. {
  598. const BYTE* p = (const BYTE*)ptr;
  599. #define PROCESS1_64 \
  600. h64 ^= (*p++) * PRIME64_5; \
  601. h64 = XXH_rotl64(h64, 11) * PRIME64_1;
  602. #define PROCESS4_64 \
  603. h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \
  604. p+=4; \
  605. h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
  606. #define PROCESS8_64 { \
  607. U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \
  608. p+=8; \
  609. h64 ^= k1; \
  610. h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \
  611. }
  612. switch(len&31) {
  613. case 24: PROCESS8_64;
  614. /* fallthrough */
  615. case 16: PROCESS8_64;
  616. /* fallthrough */
  617. case 8: PROCESS8_64;
  618. return XXH64_avalanche(h64);
  619. case 28: PROCESS8_64;
  620. /* fallthrough */
  621. case 20: PROCESS8_64;
  622. /* fallthrough */
  623. case 12: PROCESS8_64;
  624. /* fallthrough */
  625. case 4: PROCESS4_64;
  626. return XXH64_avalanche(h64);
  627. case 25: PROCESS8_64;
  628. /* fallthrough */
  629. case 17: PROCESS8_64;
  630. /* fallthrough */
  631. case 9: PROCESS8_64;
  632. PROCESS1_64;
  633. return XXH64_avalanche(h64);
  634. case 29: PROCESS8_64;
  635. /* fallthrough */
  636. case 21: PROCESS8_64;
  637. /* fallthrough */
  638. case 13: PROCESS8_64;
  639. /* fallthrough */
  640. case 5: PROCESS4_64;
  641. PROCESS1_64;
  642. return XXH64_avalanche(h64);
  643. case 26: PROCESS8_64;
  644. /* fallthrough */
  645. case 18: PROCESS8_64;
  646. /* fallthrough */
  647. case 10: PROCESS8_64;
  648. PROCESS1_64;
  649. PROCESS1_64;
  650. return XXH64_avalanche(h64);
  651. case 30: PROCESS8_64;
  652. /* fallthrough */
  653. case 22: PROCESS8_64;
  654. /* fallthrough */
  655. case 14: PROCESS8_64;
  656. /* fallthrough */
  657. case 6: PROCESS4_64;
  658. PROCESS1_64;
  659. PROCESS1_64;
  660. return XXH64_avalanche(h64);
  661. case 27: PROCESS8_64;
  662. /* fallthrough */
  663. case 19: PROCESS8_64;
  664. /* fallthrough */
  665. case 11: PROCESS8_64;
  666. PROCESS1_64;
  667. PROCESS1_64;
  668. PROCESS1_64;
  669. return XXH64_avalanche(h64);
  670. case 31: PROCESS8_64;
  671. /* fallthrough */
  672. case 23: PROCESS8_64;
  673. /* fallthrough */
  674. case 15: PROCESS8_64;
  675. /* fallthrough */
  676. case 7: PROCESS4_64;
  677. /* fallthrough */
  678. case 3: PROCESS1_64;
  679. /* fallthrough */
  680. case 2: PROCESS1_64;
  681. /* fallthrough */
  682. case 1: PROCESS1_64;
  683. /* fallthrough */
  684. case 0: return XXH64_avalanche(h64);
  685. }
  686. /* impossible to reach */
  687. assert(0);
  688. return 0; /* unreachable, but some compilers complain without it */
  689. }
  690. XXH_FORCE_INLINE U64
  691. XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_alignment align)
  692. {
  693. const BYTE* p = (const BYTE*)input;
  694. const BYTE* bEnd = p + len;
  695. U64 h64;
  696. #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
  697. if (p==NULL) {
  698. len=0;
  699. bEnd=p=(const BYTE*)(size_t)32;
  700. }
  701. #endif
  702. if (len>=32) {
  703. const BYTE* const limit = bEnd - 32;
  704. U64 v1 = seed + PRIME64_1 + PRIME64_2;
  705. U64 v2 = seed + PRIME64_2;
  706. U64 v3 = seed + 0;
  707. U64 v4 = seed - PRIME64_1;
  708. do {
  709. v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
  710. v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
  711. v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
  712. v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
  713. } while (p<=limit);
  714. h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
  715. h64 = XXH64_mergeRound(h64, v1);
  716. h64 = XXH64_mergeRound(h64, v2);
  717. h64 = XXH64_mergeRound(h64, v3);
  718. h64 = XXH64_mergeRound(h64, v4);
  719. } else {
  720. h64 = seed + PRIME64_5;
  721. }
  722. h64 += (U64) len;
  723. return XXH64_finalize(h64, p, len, align);
  724. }
  725. XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
  726. {
  727. #if 0
  728. /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
  729. XXH64_state_t state;
  730. XXH64_reset(&state, seed);
  731. XXH64_update(&state, input, len);
  732. return XXH64_digest(&state);
  733. #else
  734. if (XXH_FORCE_ALIGN_CHECK) {
  735. if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
  736. return XXH64_endian_align(input, len, seed, XXH_aligned);
  737. } }
  738. return XXH64_endian_align(input, len, seed, XXH_unaligned);
  739. #endif
  740. }
  741. /*====== Hash Streaming ======*/
  742. XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
  743. {
  744. return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
  745. }
  746. XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
  747. {
  748. XXH_free(statePtr);
  749. return XXH_OK;
  750. }
  751. XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
  752. {
  753. memcpy(dstState, srcState, sizeof(*dstState));
  754. }
  755. XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
  756. {
  757. XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
  758. memset(&state, 0, sizeof(state));
  759. state.v1 = seed + PRIME64_1 + PRIME64_2;
  760. state.v2 = seed + PRIME64_2;
  761. state.v3 = seed + 0;
  762. state.v4 = seed - PRIME64_1;
  763. /* do not write into reserved, planned to be removed in a future version */
  764. memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
  765. return XXH_OK;
  766. }
  767. XXH_PUBLIC_API XXH_errorcode
  768. XXH64_update (XXH64_state_t* state, const void* input, size_t len)
  769. {
  770. if (input==NULL)
  771. #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
  772. return XXH_OK;
  773. #else
  774. return XXH_ERROR;
  775. #endif
  776. { const BYTE* p = (const BYTE*)input;
  777. const BYTE* const bEnd = p + len;
  778. state->total_len += len;
  779. if (state->memsize + len < 32) { /* fill in tmp buffer */
  780. XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
  781. state->memsize += (U32)len;
  782. return XXH_OK;
  783. }
  784. if (state->memsize) { /* tmp buffer is full */
  785. XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
  786. state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0));
  787. state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1));
  788. state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2));
  789. state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3));
  790. p += 32-state->memsize;
  791. state->memsize = 0;
  792. }
  793. if (p+32 <= bEnd) {
  794. const BYTE* const limit = bEnd - 32;
  795. U64 v1 = state->v1;
  796. U64 v2 = state->v2;
  797. U64 v3 = state->v3;
  798. U64 v4 = state->v4;
  799. do {
  800. v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8;
  801. v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8;
  802. v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8;
  803. v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8;
  804. } while (p<=limit);
  805. state->v1 = v1;
  806. state->v2 = v2;
  807. state->v3 = v3;
  808. state->v4 = v4;
  809. }
  810. if (p < bEnd) {
  811. XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
  812. state->memsize = (unsigned)(bEnd-p);
  813. }
  814. }
  815. return XXH_OK;
  816. }
  817. XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state)
  818. {
  819. U64 h64;
  820. if (state->total_len >= 32) {
  821. U64 const v1 = state->v1;
  822. U64 const v2 = state->v2;
  823. U64 const v3 = state->v3;
  824. U64 const v4 = state->v4;
  825. h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
  826. h64 = XXH64_mergeRound(h64, v1);
  827. h64 = XXH64_mergeRound(h64, v2);
  828. h64 = XXH64_mergeRound(h64, v3);
  829. h64 = XXH64_mergeRound(h64, v4);
  830. } else {
  831. h64 = state->v3 /*seed*/ + PRIME64_5;
  832. }
  833. h64 += (U64) state->total_len;
  834. return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, XXH_aligned);
  835. }
  836. /*====== Canonical representation ======*/
  837. XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
  838. {
  839. XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
  840. if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
  841. memcpy(dst, &hash, sizeof(*dst));
  842. }
  843. XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
  844. {
  845. return XXH_readBE64(src);
  846. }
  847. /* *********************************************************************
  848. * XXH3
  849. * New generation hash designed for speed on small keys and vectorization
  850. ************************************************************************ */
  851. #include "xxh3.h"
  852. #endif /* XXH_NO_LONG_LONG */