Last active
May 2, 2020 22:15
-
-
Save vchirikov/14ce3c3ed5b1e8938bd7d30106ec6e84 to your computer and use it in GitHub Desktop.
MurmurHash3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public interface IHashFunction | |
{ | |
/// <summary> | |
/// Computes the 32-bit hash for the <seealso cref="ReadOnlySpan{T}"/> | |
/// </summary> | |
byte[] ComputeHash128(in ReadOnlySpan<byte> buffer); | |
/// <summary> | |
/// Computes the 128-bit hash for the <seealso cref="ReadOnlySpan{T}"/> | |
/// </summary> | |
uint ComputeHash32(in ReadOnlySpan<byte> buffer); | |
} | |
/// <summary> | |
/// <see href="https://github.com/aappleby/smhasher/wiki/MurmurHash3">Docs</see> | |
/// and <see href="https://github.com/aappleby/smhasher/blob/92cf3702fcfaadc84eb7bef59825a23e0cd84f56/src/MurmurHash3.cpp#L255">MurmurHash3.cpp</see> | |
/// You can do this online on <seealso href="http://murmurhash.shorelabs.com/"/> | |
/// Contains x64-optimized versions only. | |
/// </summary> | |
public class MurmurHash3 : IHashFunction | |
{ | |
/// <inheritdoc/> | |
public unsafe uint ComputeHash32(in ReadOnlySpan<byte> buffer) | |
{ | |
const uint seed = 0; | |
var len = buffer.Length; | |
int nblocks = len / 16; | |
uint h1 = seed; | |
const uint c1 = 0xcc9e2d51; | |
const uint c2 = 0x1b873593; | |
// body | |
fixed (byte* pbuffer = buffer) | |
{ | |
byte* pinput = pbuffer; | |
uint* body = (uint*)pinput; | |
uint k1; | |
for (int i = -nblocks; i > 0; i++) | |
{ | |
k1 = body[i]; | |
k1 *= c1; | |
k1 = (k1 << 15) | (k1 >> (32 - 15)); // ROTL32(k1, 15) | |
k1 *= c2; | |
h1 ^= k1; | |
h1 = (h1 << 13) | (h1 >> (32 - 13)); // ROTL32(h1, 13) | |
h1 = (h1 * 5) + 0xe6546b64; | |
} | |
// tail | |
byte* tail = pinput + (nblocks * 4); | |
k1 = 0; | |
switch (len & 3) | |
{ | |
case 3: | |
k1 ^= (uint)tail[2] << 16; | |
goto case 2; | |
case 2: | |
k1 ^= (uint)tail[1] << 8; | |
goto case 1; | |
case 1: | |
k1 ^= (uint)tail[0]; | |
k1 *= c1; | |
k1 = (k1 << 15) | (k1 >> (32 - 15)); // ROTL32(k1, 15) | |
k1 *= c2; | |
h1 ^= k1; | |
break; | |
}; | |
} | |
// finalization | |
h1 ^= (uint)len; | |
h1 = FMix32(h1); | |
return h1; | |
} | |
/// <inheritdoc/> | |
public unsafe byte[] ComputeHash128(in ReadOnlySpan<byte> buffer) | |
{ | |
const ulong c1 = 0x87c37b91_114253d5; | |
const ulong c2 = 0x4cf5ad43_2745937f; | |
const ulong seed = 0; | |
var len = buffer.Length; | |
int nblocks = len / 16; | |
ulong h1 = seed; | |
ulong h2 = seed; | |
// body | |
fixed (byte* pbuffer = buffer) | |
{ | |
byte* pinput = pbuffer; | |
ulong* body = (ulong*)pinput; | |
ulong k1; | |
ulong k2; | |
for (int i = 0; i < nblocks; i++) | |
{ | |
k1 = body[i * 2]; | |
k2 = body[(i * 2) + 1]; | |
k1 *= c1; | |
k1 = (k1 << 31) | (k1 >> (64 - 31)); // ROTL64(k1, 31); | |
k1 *= c2; | |
h1 ^= k1; | |
h1 = (h1 << 27) | (h1 >> (64 - 27)); // ROTL64(h1, 27); | |
h1 += h2; | |
h1 = (h1 * 5) + 0x52dce729; | |
k2 *= c2; | |
k2 = (k2 << 33) | (k2 >> (64 - 33)); // ROTL64(k2, 33); | |
k2 *= c1; | |
h2 ^= k2; | |
h2 = (h2 << 31) | (h2 >> (64 - 31)); // ROTL64(h2, 31); | |
h2 += h1; | |
h2 = (h2 * 5) + 0x38495ab5; | |
} | |
// tail | |
k1 = 0; | |
k2 = 0; | |
byte* tail = pinput + (nblocks * 16); | |
switch (len & 15) | |
{ | |
case 15: | |
k2 ^= (ulong)tail[14] << 48; | |
goto case 14; | |
case 14: | |
k2 ^= (ulong)tail[13] << 40; | |
goto case 13; | |
case 13: | |
k2 ^= (ulong)tail[12] << 32; | |
goto case 12; | |
case 12: | |
k2 ^= (ulong)tail[11] << 24; | |
goto case 11; | |
case 11: | |
k2 ^= (ulong)tail[10] << 16; | |
goto case 10; | |
case 10: | |
k2 ^= (ulong)tail[9] << 8; | |
goto case 9; | |
case 9: | |
k2 ^= tail[8]; | |
k2 *= c2; | |
k2 = (k2 << 33) | (k2 >> (64 - 33)); // ROTL64(k2, 33); | |
k2 *= c1; | |
h2 ^= k2; | |
goto case 8; | |
case 8: | |
k1 ^= (ulong)tail[7] << 56; | |
goto case 7; | |
case 7: | |
k1 ^= (ulong)tail[6] << 48; | |
goto case 6; | |
case 6: | |
k1 ^= (ulong)tail[5] << 40; | |
goto case 5; | |
case 5: | |
k1 ^= (ulong)tail[4] << 32; | |
goto case 4; | |
case 4: | |
k1 ^= (ulong)tail[3] << 24; | |
goto case 3; | |
case 3: | |
k1 ^= (ulong)tail[2] << 16; | |
goto case 2; | |
case 2: | |
k1 ^= (ulong)tail[1] << 8; | |
goto case 1; | |
case 1: | |
k1 ^= tail[0]; | |
k1 *= c1; | |
k1 = (k1 << 31) | (k1 >> (64 - 31)); // ROTL64(k1, 31); | |
k1 *= c2; | |
h1 ^= k1; | |
break; | |
} | |
} | |
// finalization | |
h1 ^= (ulong)len; | |
h2 ^= (ulong)len; | |
h1 += h2; | |
h2 += h1; | |
h1 = FMix64(h1); | |
h2 = FMix64(h2); | |
h1 += h2; | |
h2 += h1; | |
var ret = new byte[16]; | |
fixed (byte* pret = ret) | |
{ | |
var ulpret = (ulong*)pret; | |
ulpret[0] = Reverse(h1); | |
ulpret[1] = Reverse(h2); | |
} | |
return ret; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static ulong FMix64(ulong k) | |
{ | |
k ^= k >> 33; | |
k *= 0xff51afd7ed558ccd; | |
k ^= k >> 33; | |
k *= 0xc4ceb9fe1a85ec53; | |
k ^= k >> 33; | |
return k; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static uint FMix32(uint h) | |
{ | |
h ^= h >> 16; | |
h *= 0x85ebca6b; | |
h ^= h >> 13; | |
h *= 0xc2b2ae35; | |
h ^= h >> 16; | |
return h; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static ulong Reverse(ulong value) => | |
((value & 0x00000000000000FFUL) << 56) | ((value & 0x000000000000FF00UL) << 40) | | |
((value & 0x0000000000FF0000UL) << 24) | ((value & 0x00000000FF000000UL) << 08) | | |
((value & 0x000000FF00000000UL) >> 08) | ((value & 0x0000FF0000000000UL) >> 24) | | |
((value & 0x00FF000000000000UL) >> 40) | ((value & 0xFF00000000000000UL) >> 56); | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static ulong Reverse(uint value) => | |
((value & 0x000000FFU) << 24) | ((value & 0x0000FF00U) << 08) | | |
((value & 0x00FF0000U) >> 08) | ((value & 0xFF000000U) >> 24); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <summary> | |
/// Check yourself on <seealso href="http://murmurhash.shorelabs.com/"/> | |
/// </summary> | |
public class MurmurHash3Tests | |
{ | |
[Theory] | |
[MemberData(nameof(ComputeHash32_Input))] | |
public void ComputeHash32(ReadOnlyMemory<byte> input, uint expected) | |
{ | |
var hasher = new MurmurHash3(); | |
var result = hasher.ComputeHash32(input.Span); | |
Assert.Equal(expected, result); | |
} | |
public static IEnumerable<object[]> ComputeHash32_Input() => new List<object[]> { | |
new object[]{ Encoding.UTF8.GetBytes("foo").AsMemory<byte>(), 4138058784 }, | |
new object[]{ Encoding.UTF8.GetBytes("bar").AsMemory<byte>(), 1158584717 }, | |
}; | |
[Theory] | |
[MemberData(nameof(ComputeHash128_Input))] | |
public void ComputeHash128(ReadOnlyMemory<byte> input, string expected) | |
{ | |
var hasher = new MurmurHash3(); | |
var bytes = hasher.ComputeHash128(input.Span); | |
var hash = string.Concat(bytes.Select(x => x.ToString("x2"))); | |
Assert.Equal(expected, hash); | |
} | |
public static IEnumerable<object[]> ComputeHash128_Input() => new List<object[]> { | |
new object[]{ Encoding.UTF8.GetBytes("foo").AsMemory<byte>(), "e271865701f545617eaf87e42bba7d87" }, | |
new object[]{ Encoding.UTF8.GetBytes("bar").AsMemory<byte>(), "923658dbfd3ae604244fd74548bc56c0" }, | |
}; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment