hash: Add 128-bit murmurhash.

author Joe Stringer <joestringer@nicira.com>

Mon, 11 Aug 2014 23:12:12 +0000 (11:12 +1200)

committer Joe Stringer <joestringer@nicira.com>

Tue, 25 Nov 2014 22:12:24 +0000 (14:12 -0800)
author Joe Stringer <joestringer@nicira.com>
Mon, 11 Aug 2014 23:12:12 +0000 (11:12 +1200)
committer Joe Stringer <joestringer@nicira.com>
Tue, 25 Nov 2014 22:12:24 +0000 (14:12 -0800)
diff --git a/include/openvswitch/types.h b/include/openvswitch/types.h

index 54541a42b2b9b3d981c01ea476f96b3e33586988..2afb7b71f6e5709391e286232714c3ba1048beea 100644 (file)
--- a/include/openvswitch/types.h
+++ b/include/openvswitch/types.h
@@ -81,6 +81,20 @@ typedef struct {
  #endif
  } ovs_32aligned_u64;
  
+typedef union {
+    uint32_t u32[4];
+    struct {
+        uint64_t lo, hi;
+    } u64;
+} ovs_u128;
+
+/* Returns non-zero if the parameters have equal value. */
+static inline int
+ovs_u128_equal(const ovs_u128 *a, const ovs_u128 *b)
+{
+    return (a->u64.hi == b->u64.hi) && (a->u64.lo == b->u64.lo);
+}
+
  /* A 64-bit value, in network byte order, that is only aligned on a 32-bit
   * boundary. */
  typedef struct {
diff --git a/lib/hash.c b/lib/hash.c

index 71cd74c8f69fc37713240ba135e5bf754ef3c2ad..1042c97f643a61541976bd6f7c40d50ba8413dc3 100644 (file)
--- a/lib/hash.c
+++ b/lib/hash.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2012, 2013, 2014 Nicira, Inc.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -71,3 +71,267 @@ hash_words64__(const uint64_t p[], size_t n_words, uint64_t basis)
  {
      return hash_words64_inline(p, n_words, basis);
  }
+
+#if !(defined(__x86_64__))
+void
+hash_bytes128(const void *p_, size_t len, uint32_t basis, ovs_u128 *out)
+{
+    const uint32_t c1 = 0x239b961b;
+    const uint32_t c2 = 0xab0e9789;
+    const uint32_t c3 = 0x38b34ae5;
+    const uint32_t c4 = 0xa1e38b93;
+    const uint8_t *tail, *data = (const uint8_t *)p_;
+    const uint32_t *blocks = (const uint32_t *)p_;
+    const int nblocks = len / 16;
+    uint32_t h1 = basis;
+    uint32_t h2 = basis;
+    uint32_t h3 = basis;
+    uint32_t h4 = basis;
+    uint32_t k1, k2, k3, k4;
+
+    /* Body */
+    for (int i = 0; i < nblocks; i++) {
+        uint32_t k1 = get_unaligned_u32(&blocks[i * 4 + 0]);
+        uint32_t k2 = get_unaligned_u32(&blocks[i * 4 + 1]);
+        uint32_t k3 = get_unaligned_u32(&blocks[i * 4 + 2]);
+        uint32_t k4 = get_unaligned_u32(&blocks[i * 4 + 3]);
+
+        k1 *= c1;
+        k1 = hash_rot(k1, 15);
+        k1 *= c2;
+        h1 ^= k1;
+
+        h1 = hash_rot(h1, 19);
+        h1 += h2;
+        h1 = h1 * 5 + 0x561ccd1b;
+
+        k2 *= c2;
+        k2 = hash_rot(k2, 16);
+        k2 *= c3;
+        h2 ^= k2;
+
+        h2 = hash_rot(h2, 17);
+        h2 += h3;
+        h2 = h2 * 5 + 0x0bcaa747;
+
+        k3 *= c3;
+        k3 = hash_rot(k3, 17);
+        k3 *= c4;
+        h3 ^= k3;
+
+        h3 = hash_rot(h3, 15);
+        h3 += h4;
+        h3 = h3 * 5 + 0x96cd1c35;
+
+        k4 *= c4;
+        k4 = hash_rot(k4, 18);
+        k4 *= c1;
+        h4 ^= k4;
+
+        h4 = hash_rot(h4, 13);
+        h4 += h1;
+        h4 = h4 * 5 + 0x32ac3b17;
+    }
+
+    /* Tail */
+    k1 = k2 = k3 = k4 = 0;
+    tail = data + nblocks * 16;
+    switch (len & 15) {
+    case 15:
+        k4 ^= tail[14] << 16;
+    case 14:
+        k4 ^= tail[13] << 8;
+    case 13:
+        k4 ^= tail[12] << 0;
+        k4 *= c4;
+        k4 = hash_rot(k4, 18);
+        k4 *= c1;
+        h4 ^= k4;
+
+    case 12:
+        k3 ^= tail[11] << 24;
+    case 11:
+        k3 ^= tail[10] << 16;
+    case 10:
+        k3 ^= tail[9] << 8;
+    case 9:
+        k3 ^= tail[8] << 0;
+        k3 *= c3;
+        k3 = hash_rot(k3, 17);
+        k3 *= c4;
+        h3 ^= k3;
+
+    case 8:
+        k2 ^= tail[7] << 24;
+    case 7:
+        k2 ^= tail[6] << 16;
+    case 6:
+        k2 ^= tail[5] << 8;
+    case 5:
+        k2 ^= tail[4] << 0;
+        k2 *= c2;
+        k2 = hash_rot(k2, 16);
+        k2 *= c3;
+        h2 ^= k2;
+
+    case 4:
+        k1 ^= tail[3] << 24;
+    case 3:
+        k1 ^= tail[2] << 16;
+    case 2:
+        k1 ^= tail[1] << 8;
+    case 1:
+        k1 ^= tail[0] << 0;
+        k1 *= c1;
+        k1 = hash_rot(k1, 15);
+        k1 *= c2;
+        h1 ^= k1;
+    };
+
+    /* Finalization */
+    h1 ^= len;
+    h2 ^= len;
+    h3 ^= len;
+    h4 ^= len;
+
+    h1 += h2;
+    h1 += h3;
+    h1 += h4;
+    h2 += h1;
+    h3 += h1;
+    h4 += h1;
+
+    h1 = mhash_finish(h1);
+    h2 = mhash_finish(h2);
+    h3 = mhash_finish(h3);
+    h4 = mhash_finish(h4);
+
+    h1 += h2;
+    h1 += h3;
+    h1 += h4;
+    h2 += h1;
+    h3 += h1;
+    h4 += h1;
+
+    out->u32[0] = h1;
+    out->u32[1] = h2;
+    out->u32[2] = h3;
+    out->u32[3] = h4;
+}
+
+#else /* __x86_64__ */
+
+static inline uint64_t
+hash_rot64(uint64_t x, int8_t r)
+{
+    return (x << r) | (x >> (64 - r));
+}
+
+static inline uint64_t
+fmix64(uint64_t k)
+{
+    k ^= k >> 33;
+    k *= 0xff51afd7ed558ccdULL;
+    k ^= k >> 33;
+    k *= 0xc4ceb9fe1a85ec53ULL;
+    k ^= k >> 33;
+
+    return k;
+}
+
+void
+hash_bytes128(const void *p_, size_t len, uint32_t basis, ovs_u128 *out)
+{
+    const uint64_t c1 = 0x87c37b91114253d5ULL;
+    const uint64_t c2 = 0x4cf5ad432745937fULL;
+    const uint8_t *tail, *data = (const uint8_t *)p_;
+    const uint64_t *blocks = (const uint64_t *)p_;
+    const int nblocks = len / 16;
+    uint64_t h1 = basis;
+    uint64_t h2 = basis;
+    uint64_t k1, k2;
+
+    /* Body */
+    for (int i = 0; i < nblocks; i++) {
+        k1 = get_unaligned_u64(&blocks[i * 2 + 0]);
+        k2 = get_unaligned_u64(&blocks[i * 2 + 1]);
+
+        k1 *= c1;
+        k1 = hash_rot64(k1, 31);
+        k1 *= c2;
+        h1 ^= k1;
+
+        h1 = hash_rot64(h1, 27);
+        h1 += h2;
+        h1 = h1 * 5 + 0x52dce729;
+
+        k2 *= c2;
+        k2 = hash_rot64(k2, 33);
+        k2 *= c1;
+        h2 ^= k2;
+
+        h2 = hash_rot64(h2, 31);
+        h2 += h1;
+        h2 = h2 * 5 + 0x38495ab5;
+    }
+
+    /* Tail */
+    k1 = 0;
+    k2 = 0;
+    tail = data + nblocks * 16;
+    switch (len & 15) {
+    case 15:
+        k2 ^= ((uint64_t) tail[14]) << 48;
+    case 14:
+        k2 ^= ((uint64_t) tail[13]) << 40;
+    case 13:
+        k2 ^= ((uint64_t) tail[12]) << 32;
+    case 12:
+        k2 ^= ((uint64_t) tail[11]) << 24;
+    case 11:
+        k2 ^= ((uint64_t) tail[10]) << 16;
+    case 10:
+        k2 ^= ((uint64_t) tail[9]) << 8;
+    case 9:
+        k2 ^= ((uint64_t) tail[8]) << 0;
+        k2 *= c2;
+        k2 = hash_rot64(k2, 33);
+        k2 *= c1;
+        h2 ^= k2;
+
+    case 8:
+        k1 ^= ((uint64_t) tail[7]) << 56;
+    case 7:
+        k1 ^= ((uint64_t) tail[6]) << 48;
+    case 6:
+        k1 ^= ((uint64_t) tail[5]) << 40;
+    case 5:
+        k1 ^= ((uint64_t) tail[4]) << 32;
+    case 4:
+        k1 ^= ((uint64_t) tail[3]) << 24;
+    case 3:
+        k1 ^= ((uint64_t) tail[2]) << 16;
+    case 2:
+        k1 ^= ((uint64_t) tail[1]) << 8;
+    case 1:
+        k1 ^= ((uint64_t) tail[0]) << 0;
+        k1 *= c1;
+        k1 = hash_rot64(k1, 31);
+        k1 *= c2;
+        h1 ^= k1;
+    };
+
+    /* Finalization */
+    h1 ^= len;
+    h2 ^= len;
+    h1 += h2;
+    h2 += h1;
+    h1 = fmix64(h1);
+    h2 = fmix64(h2);
+    h1 += h2;
+    h2 += h1;
+
+    out->u64.lo = h1;
+    out->u64.hi = h2;
+}
+#endif /* __x86_64__ */
diff --git a/lib/hash.h b/lib/hash.h

index 6d3affca84fb581bc898b8a0617c16dd475b28e5..c2820dd5ed9136a440c5256089f5f272d7751314 100644 (file)
--- a/lib/hash.h
+++ b/lib/hash.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2012, 2013, 2014 Nicira, Inc.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -33,6 +33,8 @@ hash_rot(uint32_t x, int k)
  }
  
  uint32_t hash_bytes(const void *, size_t n_bytes, uint32_t basis);
+void hash_bytes128(const void *_, size_t n_bytes, uint32_t basis,
+                   ovs_u128 *out);
  
  static inline uint32_t hash_int(uint32_t x, uint32_t basis);
  static inline uint32_t hash_2words(uint32_t, uint32_t);
@@ -72,9 +74,8 @@ static inline uint32_t mhash_add(uint32_t hash, uint32_t data)
      return hash * 5 + 0xe6546b64;
  }
  
-static inline uint32_t mhash_finish(uint32_t hash, uint32_t n_bytes)
+static inline uint32_t mhash_finish(uint32_t hash)
  {
-    hash ^= n_bytes;
      hash ^= hash >> 16;
      hash *= 0x85ebca6b;
      hash ^= hash >> 13;
@@ -84,7 +85,7 @@ static inline uint32_t mhash_finish(uint32_t hash, uint32_t n_bytes)
  }
  
  #if !(defined(__SSE4_2__) && defined(__x86_64__))
-/* Mhash-based implemantation. */
+/* Mhash-based implementation. */
  
  static inline uint32_t hash_add(uint32_t hash, uint32_t data)
  {
@@ -93,7 +94,7 @@ static inline uint32_t hash_add(uint32_t hash, uint32_t data)
  
  static inline uint32_t hash_finish(uint32_t hash, uint32_t final)
  {
-    return mhash_finish(hash, final);
+    return mhash_finish(hash ^ final);
  }
  
  /* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'.
diff --git a/tests/test-hash.c b/tests/test-hash.c

index 35b23e936e56eca1e8d6bbbc8d68ae66145badd3..d7e2e6b6e1c987899f7d75bbf1a03165f811b748 100644 (file)
--- a/tests/test-hash.c
+++ b/tests/test-hash.c
@@ -35,6 +35,22 @@ set_bit(uint32_t array[3], int bit)
      }
  }
  
+static void
+set_bit128(ovs_u128 array[16], int bit)
+{
+    assert(bit >= 0 && bit <= 2048);
+    memset(array, 0, sizeof(ovs_u128) * 16);
+    if (bit < 2048) {
+        int b = bit % 128;
+
+        if (b < 64) {
+            array[bit / 128].u64.lo = UINT64_C(1) << (b % 64);
+        } else {
+            array[bit / 128].u64.hi = UINT64_C(1) << (b % 64);
+        }
+    }
+}
+
  static uint32_t
  hash_words_cb(uint32_t input)
  {
@@ -53,6 +69,15 @@ hash_int_cb(uint32_t input)
      return hash_int(input, 0);
  }
  
+static uint32_t
+hash_bytes128_cb(uint32_t input)
+{
+    ovs_u128 hash;
+
+    hash_bytes128(&input, sizeof input, 0, &hash);
+    return hash.u64.lo;
+}
+
  static void
  check_word_hash(uint32_t (*hash)(uint32_t), const char *name,
                  int min_unique)
@@ -118,6 +143,48 @@ check_3word_hash(uint32_t (*hash)(const uint32_t[], size_t, uint32_t),
      }
  }
  
+static void
+check_256byte_hash(void (*hash)(const void *, size_t, uint32_t, ovs_u128 *),
+                   const char *name, const int min_unique)
+{
+    const uint64_t unique_mask = (UINT64_C(1) << min_unique) - 1;
+    const int n_bits = 256 * 8;
+    int i, j;
+
+    for (i = 0; i < n_bits; i++) {
+        for (j = i + 1; j < n_bits; j++) {
+            OVS_PACKED(struct offset_ovs_u128 {
+                uint32_t a;
+                ovs_u128 b[16];
+            }) in0_data;
+            ovs_u128 *in0, in1[16], in2[16];
+            ovs_u128 out0, out1, out2;
+
+            in0 = in0_data.b;
+            set_bit128(in0, i);
+            set_bit128(in1, i);
+            set_bit128(in2, j);
+            hash(in0, sizeof(ovs_u128) * 16, 0, &out0);
+            hash(in1, sizeof(ovs_u128) * 16, 0, &out1);
+            hash(in2, sizeof(ovs_u128) * 16, 0, &out2);
+            if (!ovs_u128_equal(&out0, &out1)) {
+                printf("%s hash not the same for non-64 aligned data "
+                       "%016"PRIx64"%016"PRIx64" != %016"PRIx64"%016"PRIx64"\n",
+                       name, out0.u64.lo, out0.u64.hi, out1.u64.lo, out1.u64.hi);
+            }
+            if ((out1.u64.lo & unique_mask) == (out2.u64.lo & unique_mask)) {
+                printf("%s has a partial collision:\n", name);
+                printf("hash(1 << %4d) == %016"PRIx64"%016"PRIx64"\n", i,
+                       out1.u64.hi, out1.u64.lo);
+                printf("hash(1 << %4d) == %016"PRIx64"%016"PRIx64"\n", j,
+                       out2.u64.hi, out2.u64.lo);
+                printf("The low-order %d bits of output are both "
+                       "0x%"PRIx64"\n", min_unique, out1.u64.lo & unique_mask);
+            }
+        }
+    }
+}
+
  static void
  test_hash_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
  {
@@ -176,6 +243,22 @@ test_hash_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
       * function.
       */
      check_word_hash(hash_int_cb, "hash_int", 12);
+    check_word_hash(hash_bytes128_cb, "hash_bytes128", 12);
+
+    /* Check that all hashes computed with hash_bytes128 with 1-bit (or no
+     * 1-bits) set within 16 128-bit words have different values in their
+     * lowest 23 bits.
+     *
+     * Given a random distribution, the probability of at least one collision
+     * in any set of 23 bits is approximately
+     *
+     *                      1 - ((2**23 - 1)/2**23)**C(2049,2)
+     *                   == 1 - (8,388,607/8,388,608)**2,098,176
+     *                   =~ 0.22
+     *
+     * so we are doing pretty well to not have any collisions in 23 bits.
+     */
+    check_256byte_hash(hash_bytes128, "hash_bytes128", 23);
  }
  
  OVSTEST_REGISTER("test-hash", test_hash_main);
author	Joe Stringer <joestringer@nicira.com>
	Mon, 11 Aug 2014 23:12:12 +0000 (11:12 +1200)
committer	Joe Stringer <joestringer@nicira.com>
	Tue, 25 Nov 2014 22:12:24 +0000 (14:12 -0800)
include/openvswitch/types.h		patch \| blob \| blame \| history
lib/hash.c		patch \| blob \| blame \| history
lib/hash.h		patch \| blob \| blame \| history
tests/test-hash.c		patch \| blob \| blame \| history