aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/eina/src/tests/city.cc
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/eina/src/tests/city.cc')
-rw-r--r--libraries/eina/src/tests/city.cc307
1 files changed, 307 insertions, 0 deletions
diff --git a/libraries/eina/src/tests/city.cc b/libraries/eina/src/tests/city.cc
new file mode 100644
index 0000000..36ff93b
--- /dev/null
+++ b/libraries/eina/src/tests/city.cc
@@ -0,0 +1,307 @@
1// Copyright (c) 2011 Google, Inc.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20//
21// CityHash Version 1, by Geoff Pike and Jyrki Alakuijala
22//
23// This file provides CityHash64() and related functions.
24//
25// It's probably possible to create even faster hash functions by
26// writing a program that systematically explores some of the space of
27// possible hash functions, by using SIMD instructions, or by
28// compromising on hash quality.
29
30#include "city.h"
31
32#include <algorithm>
33
34using namespace std;
35
36#define UNALIGNED_LOAD64(p) (*(const uint64*)(p))
37#define UNALIGNED_LOAD32(p) (*(const uint32*)(p))
38
39#if !defined(LIKELY)
40#if defined(__GNUC__)
41#define LIKELY(x) (__builtin_expect(!!(x), 1))
42#else
43#define LIKELY(x) (x)
44#endif
45#endif
46
47// Some primes between 2^63 and 2^64 for various uses.
48static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
49static const uint64 k1 = 0xb492b66fbe98f273ULL;
50static const uint64 k2 = 0x9ae16a3b2f90404fULL;
51static const uint64 k3 = 0xc949d7c7509e6557ULL;
52
53// Bitwise right rotate. Normally this will compile to a single
54// instruction, especially if the shift is a manifest constant.
55static uint64 Rotate(uint64 val, int shift) {
56 // Avoid shifting by 64: doing so yields an undefined result.
57 return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
58}
59
60// Equivalent to Rotate(), but requires the second arg to be non-zero.
61// On x86-64, and probably others, it's possible for this to compile
62// to a single instruction if both args are already in registers.
63static uint64 RotateByAtLeast1(uint64 val, int shift) {
64 return (val >> shift) | (val << (64 - shift));
65}
66
67static uint64 ShiftMix(uint64 val) {
68 return val ^ (val >> 47);
69}
70
71static uint64 HashLen16(uint64 u, uint64 v) {
72 return Hash128to64(uint128(u, v));
73}
74
75static uint64 HashLen0to16(const char *s, size_t len) {
76 if (len > 8) {
77 uint64 a = UNALIGNED_LOAD64(s);
78 uint64 b = UNALIGNED_LOAD64(s + len - 8);
79 return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
80 }
81 if (len >= 4) {
82 uint64 a = UNALIGNED_LOAD32(s);
83 return HashLen16(len + (a << 3), UNALIGNED_LOAD32(s + len - 4));
84 }
85 if (len > 0) {
86 uint8 a = s[0];
87 uint8 b = s[len >> 1];
88 uint8 c = s[len - 1];
89 uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
90 uint32 z = len + (static_cast<uint32>(c) << 2);
91 return ShiftMix(y * k2 ^ z * k3) * k2;
92 }
93 return k2;
94}
95
96// This probably works well for 16-byte strings as well, but it may be overkill
97// in that case.
98static uint64 HashLen17to32(const char *s, size_t len) {
99 uint64 a = UNALIGNED_LOAD64(s) * k1;
100 uint64 b = UNALIGNED_LOAD64(s + 8);
101 uint64 c = UNALIGNED_LOAD64(s + len - 8) * k2;
102 uint64 d = UNALIGNED_LOAD64(s + len - 16) * k0;
103 return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
104 a + Rotate(b ^ k3, 20) - c + len);
105}
106
107// Return a 16-byte hash for 48 bytes. Quick and dirty.
108// Callers do best to use "random-looking" values for a and b.
109static pair<uint64, uint64> WeakHashLen32WithSeeds(
110 uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
111 a += w;
112 b = Rotate(b + a + z, 21);
113 uint64 c = a;
114 a += x;
115 a += y;
116 b += Rotate(a, 44);
117 return make_pair(a + z, b + c);
118}
119
120// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
121static pair<uint64, uint64> WeakHashLen32WithSeeds(
122 const char* s, uint64 a, uint64 b) {
123 return WeakHashLen32WithSeeds(UNALIGNED_LOAD64(s),
124 UNALIGNED_LOAD64(s + 8),
125 UNALIGNED_LOAD64(s + 16),
126 UNALIGNED_LOAD64(s + 24),
127 a,
128 b);
129}
130
131// Return an 8-byte hash for 33 to 64 bytes.
132static uint64 HashLen33to64(const char *s, size_t len) {
133 uint64 z = UNALIGNED_LOAD64(s + 24);
134 uint64 a = UNALIGNED_LOAD64(s) + (len + UNALIGNED_LOAD64(s + len - 16)) * k0;
135 uint64 b = Rotate(a + z, 52);
136 uint64 c = Rotate(a, 37);
137 a += UNALIGNED_LOAD64(s + 8);
138 c += Rotate(a, 7);
139 a += UNALIGNED_LOAD64(s + 16);
140 uint64 vf = a + z;
141 uint64 vs = b + Rotate(a, 31) + c;
142 a = UNALIGNED_LOAD64(s + 16) + UNALIGNED_LOAD64(s + len - 32);
143 z = UNALIGNED_LOAD64(s + len - 8);
144 b = Rotate(a + z, 52);
145 c = Rotate(a, 37);
146 a += UNALIGNED_LOAD64(s + len - 24);
147 c += Rotate(a, 7);
148 a += UNALIGNED_LOAD64(s + len - 16);
149 uint64 wf = a + z;
150 uint64 ws = b + Rotate(a, 31) + c;
151 uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
152 return ShiftMix(r * k0 + vs) * k2;
153}
154
155uint64 CityHash64(const char *s, size_t len) {
156 if (len <= 32) {
157 if (len <= 16) {
158 return HashLen0to16(s, len);
159 } else {
160 return HashLen17to32(s, len);
161 }
162 } else if (len <= 64) {
163 return HashLen33to64(s, len);
164 }
165
166 // For strings over 64 bytes we hash the end first, and then as we
167 // loop we keep 56 bytes of state: v, w, x, y, and z.
168 uint64 x = UNALIGNED_LOAD64(s);
169 uint64 y = UNALIGNED_LOAD64(s + len - 16) ^ k1;
170 uint64 z = UNALIGNED_LOAD64(s + len - 56) ^ k0;
171 pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, y);
172 pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, len * k1, k0);
173 z += ShiftMix(v.second) * k1;
174 x = Rotate(z + x, 39) * k1;
175 y = Rotate(y, 33) * k1;
176
177 // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
178 len = (len - 1) & ~static_cast<size_t>(63);
179 do {
180 x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
181 y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
182 x ^= w.second;
183 y ^= v.first;
184 z = Rotate(z ^ w.first, 33);
185 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
186 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
187 std::swap(z, x);
188 s += 64;
189 len -= 64;
190 } while (len != 0);
191 return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
192 HashLen16(v.second, w.second) + x);
193}
194
195uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
196 return CityHash64WithSeeds(s, len, k2, seed);
197}
198
199uint64 CityHash64WithSeeds(const char *s, size_t len,
200 uint64 seed0, uint64 seed1) {
201 return HashLen16(CityHash64(s, len) - seed0, seed1);
202}
203
204// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
205// of any length representable in ssize_t. Based on City and Murmur.
206static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
207 uint64 a = Uint128Low64(seed);
208 uint64 b = Uint128High64(seed);
209 uint64 c = 0;
210 uint64 d = 0;
211 ssize_t l = len - 16;
212 if (l <= 0) { // len <= 16
213 c = b * k1 + HashLen0to16(s, len);
214 d = Rotate(a + (len >= 8 ? UNALIGNED_LOAD64(s) : c), 32);
215 } else { // len > 16
216 c = HashLen16(UNALIGNED_LOAD64(s + len - 8) + k1, a);
217 d = HashLen16(b + len, c + UNALIGNED_LOAD64(s + len - 16));
218 a += d;
219 do {
220 a ^= ShiftMix(UNALIGNED_LOAD64(s) * k1) * k1;
221 a *= k1;
222 b ^= a;
223 c ^= ShiftMix(UNALIGNED_LOAD64(s + 8) * k1) * k1;
224 c *= k1;
225 d ^= c;
226 s += 16;
227 l -= 16;
228 } while (l > 0);
229 }
230 a = HashLen16(a, c);
231 b = HashLen16(d, b);
232 return uint128(a ^ b, HashLen16(b, a));
233}
234
235uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
236 if (len < 128) {
237 return CityMurmur(s, len, seed);
238 }
239
240 // We expect len >= 128 to be the common case. Keep 56 bytes of state:
241 // v, w, x, y, and z.
242 pair<uint64, uint64> v, w;
243 uint64 x = Uint128Low64(seed);
244 uint64 y = Uint128High64(seed);
245 uint64 z = len * k1;
246 v.first = Rotate(y ^ k1, 49) * k1 + UNALIGNED_LOAD64(s);
247 v.second = Rotate(v.first, 42) * k1 + UNALIGNED_LOAD64(s + 8);
248 w.first = Rotate(y + z, 35) * k1 + x;
249 w.second = Rotate(x + UNALIGNED_LOAD64(s + 88), 53) * k1;
250
251 // This is the same inner loop as CityHash64(), manually unrolled.
252 do {
253 x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
254 y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
255 x ^= w.second;
256 y ^= v.first;
257 z = Rotate(z ^ w.first, 33);
258 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
259 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
260 std::swap(z, x);
261 s += 64;
262 x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
263 y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
264 x ^= w.second;
265 y ^= v.first;
266 z = Rotate(z ^ w.first, 33);
267 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
268 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
269 std::swap(z, x);
270 s += 64;
271 len -= 128;
272 } while (LIKELY(len >= 128));
273 y += Rotate(w.first, 37) * k0 + z;
274 x += Rotate(v.first + z, 49) * k0;
275 // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
276 for (size_t tail_done = 0; tail_done < len; ) {
277 tail_done += 32;
278 y = Rotate(y - x, 42) * k0 + v.second;
279 w.first += UNALIGNED_LOAD64(s + len - tail_done + 16);
280 x = Rotate(x, 49) * k0 + w.first;
281 w.first += v.first;
282 v = WeakHashLen32WithSeeds(s + len - tail_done, v.first, v.second);
283 }
284 // At this point our 48 bytes of state should contain more than
285 // enough information for a strong 128-bit hash. We use two
286 // different 48-byte-to-8-byte hashes to get a 16-byte final result.
287 x = HashLen16(x, v.first);
288 y = HashLen16(y, w.first);
289 return uint128(HashLen16(x + v.second, w.second) + y,
290 HashLen16(x + w.second, y + v.second));
291}
292
293uint128 CityHash128(const char *s, size_t len) {
294 if (len >= 16) {
295 return CityHash128WithSeed(s + 16,
296 len - 16,
297 uint128(UNALIGNED_LOAD64(s) ^ k3,
298 UNALIGNED_LOAD64(s + 8)));
299 } else if (len >= 8) {
300 return CityHash128WithSeed(NULL,
301 0,
302 uint128(UNALIGNED_LOAD64(s) ^ (len * k0),
303 UNALIGNED_LOAD64(s + len - 8) ^ k1));
304 } else {
305 return CityHash128WithSeed(s, len, uint128(k0, k1));
306 }
307}