farmhashcc.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. package farm
  2. // This file provides a 32-bit hash equivalent to CityHash32 (v1.1.1)
  3. // and a 128-bit hash equivalent to CityHash128 (v1.1.1). It also provides
  4. // a seeded 32-bit hash function similar to CityHash32.
  5. func hash32Len13to24Seed(s []byte, seed uint32) uint32 {
  6. slen := len(s)
  7. a := fetch32(s, -4+(slen>>1))
  8. b := fetch32(s, 4)
  9. c := fetch32(s, slen-8)
  10. d := fetch32(s, (slen >> 1))
  11. e := fetch32(s, 0)
  12. f := fetch32(s, slen-4)
  13. h := d*c1 + uint32(slen) + seed
  14. a = rotate32(a, 12) + f
  15. h = mur(c, h) + a
  16. a = rotate32(a, 3) + c
  17. h = mur(e, h) + a
  18. a = rotate32(a+f, 12) + d
  19. h = mur(b^seed, h) + a
  20. return fmix(h)
  21. }
  22. func hash32Len0to4(s []byte, seed uint32) uint32 {
  23. slen := len(s)
  24. b := seed
  25. c := uint32(9)
  26. for i := 0; i < slen; i++ {
  27. v := uint32(s[i])
  28. b = (b * c1) + v
  29. c ^= b
  30. }
  31. return fmix(mur(b, mur(uint32(slen), c)))
  32. }
  33. func hash128to64(x uint128) uint64 {
  34. // Murmur-inspired hashing.
  35. const mul uint64 = 0x9ddfea08eb382d69
  36. a := (x.lo ^ x.hi) * mul
  37. a ^= (a >> 47)
  38. b := (x.hi ^ a) * mul
  39. b ^= (b >> 47)
  40. b *= mul
  41. return b
  42. }
  43. type uint128 struct {
  44. lo uint64
  45. hi uint64
  46. }
  47. // A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
  48. // of any length representable in signed long. Based on City and Murmur.
  49. func cityMurmur(s []byte, seed uint128) uint128 {
  50. slen := len(s)
  51. a := seed.lo
  52. b := seed.hi
  53. var c uint64
  54. var d uint64
  55. l := slen - 16
  56. if l <= 0 { // len <= 16
  57. a = shiftMix(a*k1) * k1
  58. c = b*k1 + hashLen0to16(s)
  59. if slen >= 8 {
  60. d = shiftMix(a + fetch64(s, 0))
  61. } else {
  62. d = shiftMix(a + c)
  63. }
  64. } else { // len > 16
  65. c = hashLen16(fetch64(s, slen-8)+k1, a)
  66. d = hashLen16(b+uint64(slen), c+fetch64(s, slen-16))
  67. a += d
  68. for {
  69. a ^= shiftMix(fetch64(s, 0)*k1) * k1
  70. a *= k1
  71. b ^= a
  72. c ^= shiftMix(fetch64(s, 8)*k1) * k1
  73. c *= k1
  74. d ^= c
  75. s = s[16:]
  76. l -= 16
  77. if l <= 0 {
  78. break
  79. }
  80. }
  81. }
  82. a = hashLen16(a, c)
  83. b = hashLen16(d, b)
  84. return uint128{a ^ b, hashLen16(b, a)}
  85. }
  86. func cityHash128WithSeed(s []byte, seed uint128) uint128 {
  87. slen := len(s)
  88. if slen < 128 {
  89. return cityMurmur(s, seed)
  90. }
  91. endIdx := ((slen - 1) / 128) * 128
  92. lastBlockIdx := endIdx + ((slen - 1) & 127) - 127
  93. last := s[lastBlockIdx:]
  94. // We expect len >= 128 to be the common case. Keep 56 bytes of state:
  95. // v, w, x, y, and z.
  96. var v1, v2 uint64
  97. var w1, w2 uint64
  98. x := seed.lo
  99. y := seed.hi
  100. z := uint64(slen) * k1
  101. v1 = rotate64(y^k1, 49)*k1 + fetch64(s, 0)
  102. v2 = rotate64(v1, 42)*k1 + fetch64(s, 8)
  103. w1 = rotate64(y+z, 35)*k1 + x
  104. w2 = rotate64(x+fetch64(s, 88), 53) * k1
  105. // This is the same inner loop as CityHash64(), manually unrolled.
  106. for {
  107. x = rotate64(x+y+v1+fetch64(s, 8), 37) * k1
  108. y = rotate64(y+v2+fetch64(s, 48), 42) * k1
  109. x ^= w2
  110. y += v1 + fetch64(s, 40)
  111. z = rotate64(z+w1, 33) * k1
  112. v1, v2 = weakHashLen32WithSeeds(s, v2*k1, x+w1)
  113. w1, w2 = weakHashLen32WithSeeds(s[32:], z+w2, y+fetch64(s, 16))
  114. z, x = x, z
  115. s = s[64:]
  116. x = rotate64(x+y+v1+fetch64(s, 8), 37) * k1
  117. y = rotate64(y+v2+fetch64(s, 48), 42) * k1
  118. x ^= w2
  119. y += v1 + fetch64(s, 40)
  120. z = rotate64(z+w1, 33) * k1
  121. v1, v2 = weakHashLen32WithSeeds(s, v2*k1, x+w1)
  122. w1, w2 = weakHashLen32WithSeeds(s[32:], z+w2, y+fetch64(s, 16))
  123. z, x = x, z
  124. s = s[64:]
  125. slen -= 128
  126. if slen < 128 {
  127. break
  128. }
  129. }
  130. x += rotate64(v1+z, 49) * k0
  131. y = y*k0 + rotate64(w2, 37)
  132. z = z*k0 + rotate64(w1, 27)
  133. w1 *= 9
  134. v1 *= k0
  135. // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
  136. for tailDone := 0; tailDone < slen; {
  137. tailDone += 32
  138. y = rotate64(x+y, 42)*k0 + v2
  139. w1 += fetch64(last, 128-tailDone+16)
  140. x = x*k0 + w1
  141. z += w2 + fetch64(last, 128-tailDone)
  142. w2 += v1
  143. v1, v2 = weakHashLen32WithSeeds(last[128-tailDone:], v1+z, v2)
  144. v1 *= k0
  145. }
  146. // At this point our 56 bytes of state should contain more than
  147. // enough information for a strong 128-bit hash. We use two
  148. // different 56-byte-to-8-byte hashes to get a 16-byte final result.
  149. x = hashLen16(x, v1)
  150. y = hashLen16(y+z, w1)
  151. return uint128{hashLen16(x+v2, w2) + y,
  152. hashLen16(x+w2, y+v2)}
  153. }
  154. func cityHash128(s []byte) uint128 {
  155. slen := len(s)
  156. if slen >= 16 {
  157. return cityHash128WithSeed(s[16:], uint128{fetch64(s, 0), fetch64(s, 8) + k0})
  158. }
  159. return cityHash128WithSeed(s, uint128{k0, k1})
  160. }
  161. // Fingerprint128 is a 128-bit fingerprint function for byte-slices
  162. func Fingerprint128(s []byte) (lo, hi uint64) {
  163. h := cityHash128(s)
  164. return h.lo, h.hi
  165. }
  166. // Fingerprint64 is a 64-bit fingerprint function for byte-slices
  167. func Fingerprint64(s []byte) uint64 {
  168. return Hash64(s)
  169. }
  170. // Fingerprint32 is a 32-bit fingerprint function for byte-slices
  171. func Fingerprint32(s []byte) uint32 {
  172. return Hash32(s)
  173. }
  174. // Hash128 is a 128-bit hash function for byte-slices
  175. func Hash128(s []byte) (lo, hi uint64) {
  176. return Fingerprint128(s)
  177. }
  178. // Hash128WithSeed is a 128-bit hash function for byte-slices and a 128-bit seed
  179. func Hash128WithSeed(s []byte, seed0, seed1 uint64) (lo, hi uint64) {
  180. h := cityHash128WithSeed(s, uint128{seed0, seed1})
  181. return h.lo, h.hi
  182. }