]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/rocksdb/util/hash_test.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / util / hash_test.cc
index 08fcaf574a45422004f17d91d07631053f06f4d3..72112b0448136d440b45c297353ca1c700367857 100644 (file)
 #include "util/hash.h"
 
 #include <cstring>
+#include <type_traits>
 #include <vector>
 
 #include "test_util/testharness.h"
 #include "util/coding.h"
+#include "util/coding_lean.h"
+#include "util/hash128.h"
+#include "util/math.h"
 #include "util/math128.h"
 
+using ROCKSDB_NAMESPACE::BijectiveHash2x64;
+using ROCKSDB_NAMESPACE::BijectiveUnhash2x64;
+using ROCKSDB_NAMESPACE::DecodeFixed64;
 using ROCKSDB_NAMESPACE::EncodeFixed32;
+using ROCKSDB_NAMESPACE::EndianSwapValue;
 using ROCKSDB_NAMESPACE::GetSliceHash64;
 using ROCKSDB_NAMESPACE::Hash;
+using ROCKSDB_NAMESPACE::Hash128;
+using ROCKSDB_NAMESPACE::Hash2x64;
 using ROCKSDB_NAMESPACE::Hash64;
 using ROCKSDB_NAMESPACE::Lower32of64;
+using ROCKSDB_NAMESPACE::Lower64of128;
+using ROCKSDB_NAMESPACE::ReverseBits;
 using ROCKSDB_NAMESPACE::Slice;
+using ROCKSDB_NAMESPACE::Unsigned128;
 using ROCKSDB_NAMESPACE::Upper32of64;
+using ROCKSDB_NAMESPACE::Upper64of128;
 
 // The hash algorithm is part of the file format, for example for the Bloom
 // filters. Test that the hash values are stable for a set of random strings of
@@ -93,7 +107,8 @@ TEST(HashTest, Hash64Misc) {
     for (size_t size = 0; size <= max_size; ++size) {
       uint64_t here = Hash64(str.data(), size, kSeed);
 
-      // Must be same as GetSliceHash64
+      // Must be same as unseeded Hash64 and GetSliceHash64
+      EXPECT_EQ(here, Hash64(str.data(), size));
       EXPECT_EQ(here, GetSliceHash64(Slice(str.data(), size)));
 
       // Upper and Lower must reconstruct hash
@@ -234,7 +249,7 @@ std::string Hash64TestDescriptor(const char *repeat, size_t limit) {
   return rv;
 }
 
-// XXH3p changes its algorithm for various sizes up through 250 bytes, so
+// XXPH3 changes its algorithm for various sizes up through 250 bytes, so
 // we need to check the stability of larger sizes also.
 TEST(HashTest, Hash64LargeValueSchema) {
   // Each of these derives a "descriptor" from the hash values for all
@@ -267,6 +282,162 @@ TEST(HashTest, Hash64LargeValueSchema) {
       "eMFlxCIYUpTCsal2qsmnGOWa8WCcefrohMjDj1fjzSvSaQwlpyR1GZHF2uPOoQagiCpHpm");
 }
 
+TEST(HashTest, Hash128Misc) {
+  constexpr uint32_t kSeed = 0;  // Same as GetSliceHash128
+
+  for (char fill : {'\0', 'a', '1', '\xff', 'e'}) {
+    const size_t max_size = 1000;
+    std::string str(max_size, fill);
+
+    if (fill == 'e') {
+      // Use different characters to check endianness handling
+      for (size_t i = 0; i < str.size(); ++i) {
+        str[i] += static_cast<char>(i);
+      }
+    }
+
+    for (size_t size = 0; size <= max_size; ++size) {
+      Unsigned128 here = Hash128(str.data(), size, kSeed);
+
+      // Must be same as unseeded Hash128 and GetSliceHash128
+      EXPECT_EQ(here, Hash128(str.data(), size));
+      EXPECT_EQ(here, GetSliceHash128(Slice(str.data(), size)));
+      {
+        uint64_t hi, lo;
+        Hash2x64(str.data(), size, &hi, &lo);
+        EXPECT_EQ(Lower64of128(here), lo);
+        EXPECT_EQ(Upper64of128(here), hi);
+      }
+      if (size == 16) {
+        const uint64_t in_hi = DecodeFixed64(str.data() + 8);
+        const uint64_t in_lo = DecodeFixed64(str.data());
+        uint64_t hi, lo;
+        BijectiveHash2x64(in_hi, in_lo, &hi, &lo);
+        EXPECT_EQ(Lower64of128(here), lo);
+        EXPECT_EQ(Upper64of128(here), hi);
+        uint64_t un_hi, un_lo;
+        BijectiveUnhash2x64(hi, lo, &un_hi, &un_lo);
+        EXPECT_EQ(in_lo, un_lo);
+        EXPECT_EQ(in_hi, un_hi);
+      }
+
+      // Upper and Lower must reconstruct hash
+      EXPECT_EQ(here,
+                (Unsigned128{Upper64of128(here)} << 64) | Lower64of128(here));
+      EXPECT_EQ(here,
+                (Unsigned128{Upper64of128(here)} << 64) ^ Lower64of128(here));
+
+      // Seed changes hash value (with high probability)
+      for (uint64_t var_seed = 1; var_seed != 0; var_seed <<= 1) {
+        Unsigned128 seeded = Hash128(str.data(), size, var_seed);
+        EXPECT_NE(here, seeded);
+        // Must match seeded Hash2x64
+        {
+          uint64_t hi, lo;
+          Hash2x64(str.data(), size, var_seed, &hi, &lo);
+          EXPECT_EQ(Lower64of128(seeded), lo);
+          EXPECT_EQ(Upper64of128(seeded), hi);
+        }
+        if (size == 16) {
+          const uint64_t in_hi = DecodeFixed64(str.data() + 8);
+          const uint64_t in_lo = DecodeFixed64(str.data());
+          uint64_t hi, lo;
+          BijectiveHash2x64(in_hi, in_lo, var_seed, &hi, &lo);
+          EXPECT_EQ(Lower64of128(seeded), lo);
+          EXPECT_EQ(Upper64of128(seeded), hi);
+          uint64_t un_hi, un_lo;
+          BijectiveUnhash2x64(hi, lo, var_seed, &un_hi, &un_lo);
+          EXPECT_EQ(in_lo, un_lo);
+          EXPECT_EQ(in_hi, un_hi);
+        }
+      }
+
+      // Size changes hash value (with high probability)
+      size_t max_smaller_by = std::min(size_t{30}, size);
+      for (size_t smaller_by = 1; smaller_by <= max_smaller_by; ++smaller_by) {
+        EXPECT_NE(here, Hash128(str.data(), size - smaller_by, kSeed));
+      }
+    }
+  }
+}
+
+// Test that hash values are "non-trivial" for "trivial" inputs
+TEST(HashTest, Hash128Trivial) {
+  // Thorough test too slow for regression testing
+  constexpr bool thorough = false;
+
+  // For various seeds, make sure hash of empty string is not zero.
+  constexpr uint64_t max_seed = thorough ? 0x1000000 : 0x10000;
+  for (uint64_t seed = 0; seed < max_seed; ++seed) {
+    Unsigned128 here = Hash128("", 0, seed);
+    EXPECT_NE(Lower64of128(here), 0u);
+    EXPECT_NE(Upper64of128(here), 0u);
+  }
+
+  // For standard seed, make sure hash of small strings are not zero
+  constexpr uint32_t kSeed = 0;  // Same as GetSliceHash128
+  char input[4];
+  constexpr int max_len = thorough ? 3 : 2;
+  for (int len = 1; len <= max_len; ++len) {
+    for (uint32_t i = 0; (i >> (len * 8)) == 0; ++i) {
+      EncodeFixed32(input, i);
+      Unsigned128 here = Hash128(input, len, kSeed);
+      EXPECT_NE(Lower64of128(here), 0u);
+      EXPECT_NE(Upper64of128(here), 0u);
+    }
+  }
+}
+
+std::string Hash128TestDescriptor(const char *repeat, size_t limit) {
+  const char *mod61_encode =
+      "abcdefghijklmnopqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+  std::string input;
+  while (input.size() < limit) {
+    input.append(repeat);
+  }
+  std::string rv;
+  for (size_t i = 0; i < limit; ++i) {
+    auto h = GetSliceHash128(Slice(input.data(), i));
+    uint64_t h2 = Upper64of128(h) + Lower64of128(h);
+    rv.append(1, mod61_encode[static_cast<size_t>(h2 % 61)]);
+  }
+  return rv;
+}
+
+// XXH3 changes its algorithm for various sizes up through 250 bytes, so
+// we need to check the stability of larger sizes also.
+TEST(HashTest, Hash128ValueSchema) {
+  // Each of these derives a "descriptor" from the hash values for all
+  // lengths up to 430.
+  // Note that "b" is common for the zero-length string.
+  EXPECT_EQ(
+      Hash128TestDescriptor("foo", 430),
+      "bUMA3As8n9I4vNGhThXlEevxZlyMcbb6TYAlIKJ2f5ponsv99q962rYclQ7u3gfnRdCDQ5JI"
+      "2LrGUaCycbXrvLFe4SjgRb9RQwCfrnmNQ7VSEwSKMnkGCK3bDbXSrnIh5qLXdtvIZklbJpGH"
+      "Dqr93BlqF9ubTnOSYkSdx89XvQqflMIW8bjfQp9BPjQejWOeEQspnN1D3sfgVdFhpaQdHYA5"
+      "pI2XcPlCMFPxvrFuRr7joaDvjNe9IUZaunLPMewuXmC3EL95h52Ju3D7y9RNKhgYxMTrA84B"
+      "yJrMvyjdm3vlBxet4EN7v2GEyjbGuaZW9UL6lrX6PghJDg7ACfLGdxNbH3qXM4zaiG2RKnL5"
+      "S3WXKR78RBB5fRFQ8KDIEQjHFvSNsc3GrAEi6W8P2lv8JMTzjBODO2uN4wadVQFT9wpGfV");
+  // Note that "35D2v" is common for "Rocks"
+  EXPECT_EQ(
+      Hash128TestDescriptor("Rocks", 430),
+      "b35D2vzvklFVDqJmyLRXyApwGGO3EAT3swhe8XJAN3mY2UVPglzdmydxcba6JI2tSvwO6zSu"
+      "ANpjSM7tc9G5iMhsa7R8GfyCXRO1TnLg7HvdWNdgGGBirxZR68BgT7TQsYJt6zyEyISeXI1n"
+      "MXA48Xo7dWfJeYN6Z4KWlqZY7TgFXGbks9AX4ehZNSGtIhdO5i58qlgVX1bEejeOVaCcjC79"
+      "67DrMfOKds7rUQzjBa77sMPcoPW1vu6ljGJPZH3XkRyDMZ1twxXKkNxN3tE8nR7JHwyqBAxE"
+      "fTcjbOWrLZ1irWxRSombD8sGDEmclgF11IxqEhe3Rt7gyofO3nExGckKkS9KfRqsCHbiUyva"
+      "JGkJwUHRXaZnh58b4i1Ei9aQKZjXlvIVDixoZrjcNaH5XJIJlRZce9Z9t82wYapTpckYSg");
+  EXPECT_EQ(
+      Hash128TestDescriptor("RocksDB", 430),
+      "b35D2vFUst3XDZCRlSrhmYYakmqImV97LbBsV6EZlOEQpUPH1d1sD3xMKAPlA5UErHehg5O7"
+      "n966fZqhAf3hRc24kGCLfNAWjyUa7vSNOx3IcPoTyVRFZeFlcCtfl7t1QJumHOCpS33EBmBF"
+      "hvK13QjBbDWYWeHQhJhgV9Mqbx17TIcvUkEnYZxb8IzWNmjVsJG44Z7v52DjGj1ZzS62S2Vv"
+      "qWcDO7apvH5VHg68E9Wl6nXP21vlmUqEH9GeWRehfWVvY7mUpsAg5drHHQyDSdiMceiUuUxJ"
+      "XJqHFcDdzbbPk7xDvbLgWCKvH8k3MpQNWOmbSSRDdAP6nGlDjoTToYkcqVREHJzztSWAAq5h"
+      "GHSUNJ6OxsMHhf8EhXfHtKyUzRmPtjYyeckQcGmrQfFFLidc6cjMDKCdBG6c6HVBrS7H2R");
+}
+
 TEST(FastRange32Test, Values) {
   using ROCKSDB_NAMESPACE::FastRange32;
   // Zero range
@@ -376,7 +547,7 @@ TEST(FastRangeGenericTest, Values) {
             uint16_t{6234});
   // Not recommended for typical use because for example this could fail on
   // some platforms and pass on others:
-  //EXPECT_EQ(FastRangeGeneric(static_cast<unsigned long>(0x80000000),
+  // EXPECT_EQ(FastRangeGeneric(static_cast<unsigned long>(0x80000000),
   //                           uint16_t{12468}),
   //          uint16_t{6234});
 }
@@ -394,9 +565,11 @@ size_t FastRange64(uint64_t hash, size_t range) {
 // Tests for math.h / math128.h (not worth a separate test binary)
 using ROCKSDB_NAMESPACE::BitParity;
 using ROCKSDB_NAMESPACE::BitsSetToOne;
+using ROCKSDB_NAMESPACE::ConstexprFloorLog2;
 using ROCKSDB_NAMESPACE::CountTrailingZeroBits;
 using ROCKSDB_NAMESPACE::DecodeFixed128;
 using ROCKSDB_NAMESPACE::DecodeFixedGeneric;
+using ROCKSDB_NAMESPACE::DownwardInvolution;
 using ROCKSDB_NAMESPACE::EncodeFixed128;
 using ROCKSDB_NAMESPACE::EncodeFixedGeneric;
 using ROCKSDB_NAMESPACE::FloorLog2;
@@ -405,6 +578,8 @@ using ROCKSDB_NAMESPACE::Multiply64to128;
 using ROCKSDB_NAMESPACE::Unsigned128;
 using ROCKSDB_NAMESPACE::Upper64of128;
 
+int blah(int x) { return DownwardInvolution(x); }
+
 template <typename T>
 static void test_BitOps() {
   // This complex code is to generalize to 128-bit values. Otherwise
@@ -426,10 +601,13 @@ static void test_BitOps() {
     // FloorLog2
     if (v > 0) {
       EXPECT_EQ(FloorLog2(v), i);
+      EXPECT_EQ(ConstexprFloorLog2(v), i);
     }
     if (vm1 > 0) {
       EXPECT_EQ(FloorLog2(vm1), i - 1);
+      EXPECT_EQ(ConstexprFloorLog2(vm1), i - 1);
       EXPECT_EQ(FloorLog2(everyOtherBit & vm1), (i - 1) & ~1);
+      EXPECT_EQ(ConstexprFloorLog2(everyOtherBit & vm1), (i - 1) & ~1);
     }
 
     // CountTrailingZeroBits
@@ -453,8 +631,89 @@ static void test_BitOps() {
     EXPECT_EQ(BitParity(vm1), i & 1);
     EXPECT_EQ(BitParity(vm1 & everyOtherBit), ((i + 1) / 2) & 1);
 
+    // EndianSwapValue
+    T ev = T{1} << (((sizeof(T) - 1 - (i / 8)) * 8) + i % 8);
+    EXPECT_EQ(EndianSwapValue(v), ev);
+
+    // ReverseBits
+    EXPECT_EQ(ReverseBits(v), static_cast<T>(T{1} << (8 * sizeof(T) - 1 - i)));
+#ifdef HAVE_UINT128_EXTENSION          // Uses multiplication
+    if (std::is_unsigned<T>::value) {  // Technical UB on signed type
+      T rv = T{1} << (8 * sizeof(T) - 1 - i);
+      EXPECT_EQ(ReverseBits(vm1), static_cast<T>(rv * ~T{1}));
+    }
+#endif
+
+    // DownwardInvolution
+    {
+      T misc = static_cast<T>(/*random*/ 0xc682cd153d0e3279U +
+                              i * /*random*/ 0x9b3972f3bea0baa3U);
+      if constexpr (sizeof(T) > 8) {
+        misc = (misc << 64) | (/*random*/ 0x52af031a38ced62dU +
+                               i * /*random*/ 0x936f803d9752ddc3U);
+      }
+      T misc_masked = misc & vm1;
+      EXPECT_LE(misc_masked, vm1);
+      T di_misc_masked = DownwardInvolution(misc_masked);
+      EXPECT_LE(di_misc_masked, vm1);
+      if (misc_masked > 0) {
+        // Highest-order 1 in same position
+        EXPECT_EQ(FloorLog2(misc_masked), FloorLog2(di_misc_masked));
+      }
+      // Validate involution property on short value
+      EXPECT_EQ(DownwardInvolution(di_misc_masked), misc_masked);
+
+      // Validate involution property on large value
+      T di_misc = DownwardInvolution(misc);
+      EXPECT_EQ(DownwardInvolution(di_misc), misc);
+      // Highest-order 1 in same position
+      if (misc > 0) {
+        EXPECT_EQ(FloorLog2(misc), FloorLog2(di_misc));
+      }
+
+      // Validate distributes over xor.
+      // static_casts to avoid numerical promotion effects.
+      EXPECT_EQ(DownwardInvolution(static_cast<T>(misc_masked ^ vm1)),
+                static_cast<T>(di_misc_masked ^ DownwardInvolution(vm1)));
+      T misc2 = static_cast<T>(misc >> 1);
+      EXPECT_EQ(DownwardInvolution(static_cast<T>(misc ^ misc2)),
+                static_cast<T>(di_misc ^ DownwardInvolution(misc2)));
+
+      // Choose some small number of bits to pull off to test combined
+      // uniqueness guarantee
+      int in_bits = i % 7;
+      unsigned in_mask = (unsigned{1} << in_bits) - 1U;
+      // IMPLICIT: int out_bits = 8 - in_bits;
+      std::vector<bool> seen(256, false);
+      for (int j = 0; j < 255; ++j) {
+        T t_in = misc ^ static_cast<T>(j);
+        unsigned in = static_cast<unsigned>(t_in);
+        unsigned out = static_cast<unsigned>(DownwardInvolution(t_in));
+        unsigned val = ((out << in_bits) | (in & in_mask)) & 255U;
+        EXPECT_FALSE(seen[val]);
+        seen[val] = true;
+      }
+
+      if (i + 8 < int{8 * sizeof(T)}) {
+        // Also test manipulating bits in the middle of input is
+        // bijective in bottom of output
+        seen = std::vector<bool>(256, false);
+        for (int j = 0; j < 255; ++j) {
+          T in = misc ^ (static_cast<T>(j) << i);
+          unsigned val = static_cast<unsigned>(DownwardInvolution(in)) & 255U;
+          EXPECT_FALSE(seen[val]);
+          seen[val] = true;
+        }
+      }
+    }
+
     vm1 = (vm1 << 1) | 1;
   }
+
+  EXPECT_EQ(ConstexprFloorLog2(T{1}), 0);
+  EXPECT_EQ(ConstexprFloorLog2(T{2}), 1);
+  EXPECT_EQ(ConstexprFloorLog2(T{3}), 1);
+  EXPECT_EQ(ConstexprFloorLog2(T{42}), 5);
 }
 
 TEST(MathTest, BitOps) {
@@ -584,9 +843,10 @@ TEST(MathTest, CodingGeneric) {
   EXPECT_EQ(std::string("_12"), std::string(out));
 }
 
-int main(int argc, char** argv) {
+int main(int argc, char **argv) {
   fprintf(stderr, "NPHash64 id: %x\n",
           static_cast<int>(ROCKSDB_NAMESPACE::GetSliceNPHash64("RocksDB")));
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
   ::testing::InitGoogleTest(&argc, argv);
 
   return RUN_ALL_TESTS();