#include "util/hash.h"
#include <cstring>
+#include <type_traits>
#include <vector>
#include "test_util/testharness.h"
#include "util/coding.h"
+#include "util/coding_lean.h"
+#include "util/hash128.h"
+#include "util/math.h"
#include "util/math128.h"
+using ROCKSDB_NAMESPACE::BijectiveHash2x64;
+using ROCKSDB_NAMESPACE::BijectiveUnhash2x64;
+using ROCKSDB_NAMESPACE::DecodeFixed64;
using ROCKSDB_NAMESPACE::EncodeFixed32;
+using ROCKSDB_NAMESPACE::EndianSwapValue;
using ROCKSDB_NAMESPACE::GetSliceHash64;
using ROCKSDB_NAMESPACE::Hash;
+using ROCKSDB_NAMESPACE::Hash128;
+using ROCKSDB_NAMESPACE::Hash2x64;
using ROCKSDB_NAMESPACE::Hash64;
using ROCKSDB_NAMESPACE::Lower32of64;
+using ROCKSDB_NAMESPACE::Lower64of128;
+using ROCKSDB_NAMESPACE::ReverseBits;
using ROCKSDB_NAMESPACE::Slice;
+using ROCKSDB_NAMESPACE::Unsigned128;
using ROCKSDB_NAMESPACE::Upper32of64;
+using ROCKSDB_NAMESPACE::Upper64of128;
// The hash algorithm is part of the file format, for example for the Bloom
// filters. Test that the hash values are stable for a set of random strings of
for (size_t size = 0; size <= max_size; ++size) {
uint64_t here = Hash64(str.data(), size, kSeed);
- // Must be same as GetSliceHash64
+ // Must be same as unseeded Hash64 and GetSliceHash64
+ EXPECT_EQ(here, Hash64(str.data(), size));
EXPECT_EQ(here, GetSliceHash64(Slice(str.data(), size)));
// Upper and Lower must reconstruct hash
return rv;
}
-// XXH3p changes its algorithm for various sizes up through 250 bytes, so
+// XXPH3 changes its algorithm for various sizes up through 250 bytes, so
// we need to check the stability of larger sizes also.
TEST(HashTest, Hash64LargeValueSchema) {
// Each of these derives a "descriptor" from the hash values for all
"eMFlxCIYUpTCsal2qsmnGOWa8WCcefrohMjDj1fjzSvSaQwlpyR1GZHF2uPOoQagiCpHpm");
}
+TEST(HashTest, Hash128Misc) {
+ constexpr uint32_t kSeed = 0; // Same as GetSliceHash128
+
+ for (char fill : {'\0', 'a', '1', '\xff', 'e'}) {
+ const size_t max_size = 1000;
+ std::string str(max_size, fill);
+
+ if (fill == 'e') {
+ // Use different characters to check endianness handling
+ for (size_t i = 0; i < str.size(); ++i) {
+ str[i] += static_cast<char>(i);
+ }
+ }
+
+ for (size_t size = 0; size <= max_size; ++size) {
+ Unsigned128 here = Hash128(str.data(), size, kSeed);
+
+ // Must be same as unseeded Hash128 and GetSliceHash128
+ EXPECT_EQ(here, Hash128(str.data(), size));
+ EXPECT_EQ(here, GetSliceHash128(Slice(str.data(), size)));
+ {
+ uint64_t hi, lo;
+ Hash2x64(str.data(), size, &hi, &lo);
+ EXPECT_EQ(Lower64of128(here), lo);
+ EXPECT_EQ(Upper64of128(here), hi);
+ }
+ if (size == 16) {
+ const uint64_t in_hi = DecodeFixed64(str.data() + 8);
+ const uint64_t in_lo = DecodeFixed64(str.data());
+ uint64_t hi, lo;
+ BijectiveHash2x64(in_hi, in_lo, &hi, &lo);
+ EXPECT_EQ(Lower64of128(here), lo);
+ EXPECT_EQ(Upper64of128(here), hi);
+ uint64_t un_hi, un_lo;
+ BijectiveUnhash2x64(hi, lo, &un_hi, &un_lo);
+ EXPECT_EQ(in_lo, un_lo);
+ EXPECT_EQ(in_hi, un_hi);
+ }
+
+ // Upper and Lower must reconstruct hash
+ EXPECT_EQ(here,
+ (Unsigned128{Upper64of128(here)} << 64) | Lower64of128(here));
+ EXPECT_EQ(here,
+ (Unsigned128{Upper64of128(here)} << 64) ^ Lower64of128(here));
+
+ // Seed changes hash value (with high probability)
+ for (uint64_t var_seed = 1; var_seed != 0; var_seed <<= 1) {
+ Unsigned128 seeded = Hash128(str.data(), size, var_seed);
+ EXPECT_NE(here, seeded);
+ // Must match seeded Hash2x64
+ {
+ uint64_t hi, lo;
+ Hash2x64(str.data(), size, var_seed, &hi, &lo);
+ EXPECT_EQ(Lower64of128(seeded), lo);
+ EXPECT_EQ(Upper64of128(seeded), hi);
+ }
+ if (size == 16) {
+ const uint64_t in_hi = DecodeFixed64(str.data() + 8);
+ const uint64_t in_lo = DecodeFixed64(str.data());
+ uint64_t hi, lo;
+ BijectiveHash2x64(in_hi, in_lo, var_seed, &hi, &lo);
+ EXPECT_EQ(Lower64of128(seeded), lo);
+ EXPECT_EQ(Upper64of128(seeded), hi);
+ uint64_t un_hi, un_lo;
+ BijectiveUnhash2x64(hi, lo, var_seed, &un_hi, &un_lo);
+ EXPECT_EQ(in_lo, un_lo);
+ EXPECT_EQ(in_hi, un_hi);
+ }
+ }
+
+ // Size changes hash value (with high probability)
+ size_t max_smaller_by = std::min(size_t{30}, size);
+ for (size_t smaller_by = 1; smaller_by <= max_smaller_by; ++smaller_by) {
+ EXPECT_NE(here, Hash128(str.data(), size - smaller_by, kSeed));
+ }
+ }
+ }
+}
+
+// Test that hash values are "non-trivial" for "trivial" inputs
+TEST(HashTest, Hash128Trivial) {
+ // Thorough test too slow for regression testing
+ constexpr bool thorough = false;
+
+ // For various seeds, make sure hash of empty string is not zero.
+ constexpr uint64_t max_seed = thorough ? 0x1000000 : 0x10000;
+ for (uint64_t seed = 0; seed < max_seed; ++seed) {
+ Unsigned128 here = Hash128("", 0, seed);
+ EXPECT_NE(Lower64of128(here), 0u);
+ EXPECT_NE(Upper64of128(here), 0u);
+ }
+
+ // For standard seed, make sure hash of small strings are not zero
+ constexpr uint32_t kSeed = 0; // Same as GetSliceHash128
+ char input[4];
+ constexpr int max_len = thorough ? 3 : 2;
+ for (int len = 1; len <= max_len; ++len) {
+ for (uint32_t i = 0; (i >> (len * 8)) == 0; ++i) {
+ EncodeFixed32(input, i);
+ Unsigned128 here = Hash128(input, len, kSeed);
+ EXPECT_NE(Lower64of128(here), 0u);
+ EXPECT_NE(Upper64of128(here), 0u);
+ }
+ }
+}
+
+std::string Hash128TestDescriptor(const char *repeat, size_t limit) {
+ const char *mod61_encode =
+ "abcdefghijklmnopqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+ std::string input;
+ while (input.size() < limit) {
+ input.append(repeat);
+ }
+ std::string rv;
+ for (size_t i = 0; i < limit; ++i) {
+ auto h = GetSliceHash128(Slice(input.data(), i));
+ uint64_t h2 = Upper64of128(h) + Lower64of128(h);
+ rv.append(1, mod61_encode[static_cast<size_t>(h2 % 61)]);
+ }
+ return rv;
+}
+
+// XXH3 changes its algorithm for various sizes up through 250 bytes, so
+// we need to check the stability of larger sizes also.
+TEST(HashTest, Hash128ValueSchema) {
+ // Each of these derives a "descriptor" from the hash values for all
+ // lengths up to 430.
+ // Note that "b" is common for the zero-length string.
+ EXPECT_EQ(
+ Hash128TestDescriptor("foo", 430),
+ "bUMA3As8n9I4vNGhThXlEevxZlyMcbb6TYAlIKJ2f5ponsv99q962rYclQ7u3gfnRdCDQ5JI"
+ "2LrGUaCycbXrvLFe4SjgRb9RQwCfrnmNQ7VSEwSKMnkGCK3bDbXSrnIh5qLXdtvIZklbJpGH"
+ "Dqr93BlqF9ubTnOSYkSdx89XvQqflMIW8bjfQp9BPjQejWOeEQspnN1D3sfgVdFhpaQdHYA5"
+ "pI2XcPlCMFPxvrFuRr7joaDvjNe9IUZaunLPMewuXmC3EL95h52Ju3D7y9RNKhgYxMTrA84B"
+ "yJrMvyjdm3vlBxet4EN7v2GEyjbGuaZW9UL6lrX6PghJDg7ACfLGdxNbH3qXM4zaiG2RKnL5"
+ "S3WXKR78RBB5fRFQ8KDIEQjHFvSNsc3GrAEi6W8P2lv8JMTzjBODO2uN4wadVQFT9wpGfV");
+ // Note that "35D2v" is common for "Rocks"
+ EXPECT_EQ(
+ Hash128TestDescriptor("Rocks", 430),
+ "b35D2vzvklFVDqJmyLRXyApwGGO3EAT3swhe8XJAN3mY2UVPglzdmydxcba6JI2tSvwO6zSu"
+ "ANpjSM7tc9G5iMhsa7R8GfyCXRO1TnLg7HvdWNdgGGBirxZR68BgT7TQsYJt6zyEyISeXI1n"
+ "MXA48Xo7dWfJeYN6Z4KWlqZY7TgFXGbks9AX4ehZNSGtIhdO5i58qlgVX1bEejeOVaCcjC79"
+ "67DrMfOKds7rUQzjBa77sMPcoPW1vu6ljGJPZH3XkRyDMZ1twxXKkNxN3tE8nR7JHwyqBAxE"
+ "fTcjbOWrLZ1irWxRSombD8sGDEmclgF11IxqEhe3Rt7gyofO3nExGckKkS9KfRqsCHbiUyva"
+ "JGkJwUHRXaZnh58b4i1Ei9aQKZjXlvIVDixoZrjcNaH5XJIJlRZce9Z9t82wYapTpckYSg");
+ EXPECT_EQ(
+ Hash128TestDescriptor("RocksDB", 430),
+ "b35D2vFUst3XDZCRlSrhmYYakmqImV97LbBsV6EZlOEQpUPH1d1sD3xMKAPlA5UErHehg5O7"
+ "n966fZqhAf3hRc24kGCLfNAWjyUa7vSNOx3IcPoTyVRFZeFlcCtfl7t1QJumHOCpS33EBmBF"
+ "hvK13QjBbDWYWeHQhJhgV9Mqbx17TIcvUkEnYZxb8IzWNmjVsJG44Z7v52DjGj1ZzS62S2Vv"
+ "qWcDO7apvH5VHg68E9Wl6nXP21vlmUqEH9GeWRehfWVvY7mUpsAg5drHHQyDSdiMceiUuUxJ"
+ "XJqHFcDdzbbPk7xDvbLgWCKvH8k3MpQNWOmbSSRDdAP6nGlDjoTToYkcqVREHJzztSWAAq5h"
+ "GHSUNJ6OxsMHhf8EhXfHtKyUzRmPtjYyeckQcGmrQfFFLidc6cjMDKCdBG6c6HVBrS7H2R");
+}
+
TEST(FastRange32Test, Values) {
using ROCKSDB_NAMESPACE::FastRange32;
// Zero range
uint16_t{6234});
// Not recommended for typical use because for example this could fail on
// some platforms and pass on others:
- //EXPECT_EQ(FastRangeGeneric(static_cast<unsigned long>(0x80000000),
+ // EXPECT_EQ(FastRangeGeneric(static_cast<unsigned long>(0x80000000),
// uint16_t{12468}),
// uint16_t{6234});
}
// Tests for math.h / math128.h (not worth a separate test binary)
using ROCKSDB_NAMESPACE::BitParity;
using ROCKSDB_NAMESPACE::BitsSetToOne;
+using ROCKSDB_NAMESPACE::ConstexprFloorLog2;
using ROCKSDB_NAMESPACE::CountTrailingZeroBits;
using ROCKSDB_NAMESPACE::DecodeFixed128;
using ROCKSDB_NAMESPACE::DecodeFixedGeneric;
+using ROCKSDB_NAMESPACE::DownwardInvolution;
using ROCKSDB_NAMESPACE::EncodeFixed128;
using ROCKSDB_NAMESPACE::EncodeFixedGeneric;
using ROCKSDB_NAMESPACE::FloorLog2;
using ROCKSDB_NAMESPACE::Unsigned128;
using ROCKSDB_NAMESPACE::Upper64of128;
+int blah(int x) { return DownwardInvolution(x); }
+
template <typename T>
static void test_BitOps() {
// This complex code is to generalize to 128-bit values. Otherwise
// FloorLog2
if (v > 0) {
EXPECT_EQ(FloorLog2(v), i);
+ EXPECT_EQ(ConstexprFloorLog2(v), i);
}
if (vm1 > 0) {
EXPECT_EQ(FloorLog2(vm1), i - 1);
+ EXPECT_EQ(ConstexprFloorLog2(vm1), i - 1);
EXPECT_EQ(FloorLog2(everyOtherBit & vm1), (i - 1) & ~1);
+ EXPECT_EQ(ConstexprFloorLog2(everyOtherBit & vm1), (i - 1) & ~1);
}
// CountTrailingZeroBits
EXPECT_EQ(BitParity(vm1), i & 1);
EXPECT_EQ(BitParity(vm1 & everyOtherBit), ((i + 1) / 2) & 1);
+ // EndianSwapValue
+ T ev = T{1} << (((sizeof(T) - 1 - (i / 8)) * 8) + i % 8);
+ EXPECT_EQ(EndianSwapValue(v), ev);
+
+ // ReverseBits
+ EXPECT_EQ(ReverseBits(v), static_cast<T>(T{1} << (8 * sizeof(T) - 1 - i)));
+#ifdef HAVE_UINT128_EXTENSION // Uses multiplication
+ if (std::is_unsigned<T>::value) { // Technical UB on signed type
+ T rv = T{1} << (8 * sizeof(T) - 1 - i);
+ EXPECT_EQ(ReverseBits(vm1), static_cast<T>(rv * ~T{1}));
+ }
+#endif
+
+ // DownwardInvolution
+ {
+ T misc = static_cast<T>(/*random*/ 0xc682cd153d0e3279U +
+ i * /*random*/ 0x9b3972f3bea0baa3U);
+ if constexpr (sizeof(T) > 8) {
+ misc = (misc << 64) | (/*random*/ 0x52af031a38ced62dU +
+ i * /*random*/ 0x936f803d9752ddc3U);
+ }
+ T misc_masked = misc & vm1;
+ EXPECT_LE(misc_masked, vm1);
+ T di_misc_masked = DownwardInvolution(misc_masked);
+ EXPECT_LE(di_misc_masked, vm1);
+ if (misc_masked > 0) {
+ // Highest-order 1 in same position
+ EXPECT_EQ(FloorLog2(misc_masked), FloorLog2(di_misc_masked));
+ }
+ // Validate involution property on short value
+ EXPECT_EQ(DownwardInvolution(di_misc_masked), misc_masked);
+
+ // Validate involution property on large value
+ T di_misc = DownwardInvolution(misc);
+ EXPECT_EQ(DownwardInvolution(di_misc), misc);
+ // Highest-order 1 in same position
+ if (misc > 0) {
+ EXPECT_EQ(FloorLog2(misc), FloorLog2(di_misc));
+ }
+
+ // Validate distributes over xor.
+ // static_casts to avoid numerical promotion effects.
+ EXPECT_EQ(DownwardInvolution(static_cast<T>(misc_masked ^ vm1)),
+ static_cast<T>(di_misc_masked ^ DownwardInvolution(vm1)));
+ T misc2 = static_cast<T>(misc >> 1);
+ EXPECT_EQ(DownwardInvolution(static_cast<T>(misc ^ misc2)),
+ static_cast<T>(di_misc ^ DownwardInvolution(misc2)));
+
+ // Choose some small number of bits to pull off to test combined
+ // uniqueness guarantee
+ int in_bits = i % 7;
+ unsigned in_mask = (unsigned{1} << in_bits) - 1U;
+ // IMPLICIT: int out_bits = 8 - in_bits;
+ std::vector<bool> seen(256, false);
+ for (int j = 0; j < 255; ++j) {
+ T t_in = misc ^ static_cast<T>(j);
+ unsigned in = static_cast<unsigned>(t_in);
+ unsigned out = static_cast<unsigned>(DownwardInvolution(t_in));
+ unsigned val = ((out << in_bits) | (in & in_mask)) & 255U;
+ EXPECT_FALSE(seen[val]);
+ seen[val] = true;
+ }
+
+ if (i + 8 < int{8 * sizeof(T)}) {
+ // Also test manipulating bits in the middle of input is
+ // bijective in bottom of output
+ seen = std::vector<bool>(256, false);
+ for (int j = 0; j < 255; ++j) {
+ T in = misc ^ (static_cast<T>(j) << i);
+ unsigned val = static_cast<unsigned>(DownwardInvolution(in)) & 255U;
+ EXPECT_FALSE(seen[val]);
+ seen[val] = true;
+ }
+ }
+ }
+
vm1 = (vm1 << 1) | 1;
}
+
+ EXPECT_EQ(ConstexprFloorLog2(T{1}), 0);
+ EXPECT_EQ(ConstexprFloorLog2(T{2}), 1);
+ EXPECT_EQ(ConstexprFloorLog2(T{3}), 1);
+ EXPECT_EQ(ConstexprFloorLog2(T{42}), 5);
}
TEST(MathTest, BitOps) {
EXPECT_EQ(std::string("_12"), std::string(out));
}
-int main(int argc, char** argv) {
+int main(int argc, char **argv) {
fprintf(stderr, "NPHash64 id: %x\n",
static_cast<int>(ROCKSDB_NAMESPACE::GetSliceNPHash64("RocksDB")));
+ ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();