--- /dev/null
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern "C" {
+
+#include <string.h>
+
+#include "./types.h"
+
+static inline gdv_uint64 rotate_left(gdv_uint64 val, int distance) {
+ return (val << distance) | (val >> (64 - distance));
+}
+
+//
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain.
+// See http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp
+// MurmurHash3_x64_128
+//
+static inline gdv_uint64 fmix64(gdv_uint64 k) {
+ k ^= k >> 33;
+ k *= 0xff51afd7ed558ccduLL;
+ k ^= k >> 33;
+ k *= 0xc4ceb9fe1a85ec53uLL;
+ k ^= k >> 33;
+ return k;
+}
+
+static inline gdv_uint64 murmur3_64(gdv_uint64 val, gdv_int32 seed) {
+ gdv_uint64 h1 = seed;
+ gdv_uint64 h2 = seed;
+
+ gdv_uint64 c1 = 0x87c37b91114253d5ull;
+ gdv_uint64 c2 = 0x4cf5ad432745937full;
+
+ int length = 8;
+ gdv_uint64 k1 = 0;
+
+ k1 = val;
+ k1 *= c1;
+ k1 = rotate_left(k1, 31);
+ k1 *= c2;
+ h1 ^= k1;
+
+ h1 ^= length;
+ h2 ^= length;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = fmix64(h1);
+ h2 = fmix64(h2);
+
+ h1 += h2;
+
+ // h2 += h1;
+ // murmur3_128 should return 128 bit (h1,h2), now we return only 64bits,
+ return h1;
+}
+
+static inline gdv_uint32 murmur3_32(gdv_uint64 val, gdv_int32 seed) {
+ gdv_uint64 c1 = 0xcc9e2d51ull;
+ gdv_uint64 c2 = 0x1b873593ull;
+ int length = 8;
+ static gdv_uint64 UINT_MASK = 0xffffffffull;
+ gdv_uint64 lh1 = seed & UINT_MASK;
+ for (int i = 0; i < 2; i++) {
+ gdv_uint64 lk1 = ((val >> i * 32) & UINT_MASK);
+ lk1 *= c1;
+ lk1 &= UINT_MASK;
+
+ lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17);
+
+ lk1 *= c2;
+ lk1 &= UINT_MASK;
+
+ lh1 ^= lk1;
+ lh1 = ((lh1 << 13) & UINT_MASK) | (lh1 >> 19);
+
+ lh1 = lh1 * 5 + 0xe6546b64L;
+ lh1 = UINT_MASK & lh1;
+ }
+ lh1 ^= length;
+
+ lh1 ^= lh1 >> 16;
+ lh1 *= 0x85ebca6bull;
+ lh1 = UINT_MASK & lh1;
+ lh1 ^= lh1 >> 13;
+ lh1 *= 0xc2b2ae35ull;
+ lh1 = UINT_MASK & lh1;
+ lh1 ^= lh1 >> 16;
+
+ return static_cast<gdv_uint32>(lh1);
+}
+
+static inline gdv_uint64 double_to_long_bits(double value) {
+ gdv_uint64 result;
+ memcpy(&result, &value, sizeof(result));
+ return result;
+}
+
+FORCE_INLINE gdv_int64 hash64(double val, gdv_int64 seed) {
+ return murmur3_64(double_to_long_bits(val), static_cast<gdv_int32>(seed));
+}
+
+FORCE_INLINE gdv_int32 hash32(double val, gdv_int32 seed) {
+ return murmur3_32(double_to_long_bits(val), seed);
+}
+
+// Wrappers for all the numeric/data/time arrow types
+
+#define HASH64_WITH_SEED_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int64 NAME##_##TYPE(gdv_##TYPE in, gdv_boolean is_valid, gdv_int64 seed, \
+ gdv_boolean seed_isvalid) { \
+ if (!is_valid) { \
+ return seed; \
+ } \
+ return hash64(static_cast<double>(in), seed); \
+ }
+
+#define HASH32_WITH_SEED_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int32 NAME##_##TYPE(gdv_##TYPE in, gdv_boolean is_valid, gdv_int32 seed, \
+ gdv_boolean seed_isvalid) { \
+ if (!is_valid) { \
+ return seed; \
+ } \
+ return hash32(static_cast<double>(in), seed); \
+ }
+
+#define HASH64_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int64 NAME##_##TYPE(gdv_##TYPE in, gdv_boolean is_valid) { \
+ return is_valid ? hash64(static_cast<double>(in), 0) : 0; \
+ }
+
+#define HASH32_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int32 NAME##_##TYPE(gdv_##TYPE in, gdv_boolean is_valid) { \
+ return is_valid ? hash32(static_cast<double>(in), 0) : 0; \
+ }
+
+// Expand inner macro for all numeric types.
+#define NUMERIC_BOOL_DATE_TYPES(INNER, NAME) \
+ INNER(NAME, int8) \
+ INNER(NAME, int16) \
+ INNER(NAME, int32) \
+ INNER(NAME, int64) \
+ INNER(NAME, uint8) \
+ INNER(NAME, uint16) \
+ INNER(NAME, uint32) \
+ INNER(NAME, uint64) \
+ INNER(NAME, float32) \
+ INNER(NAME, float64) \
+ INNER(NAME, boolean) \
+ INNER(NAME, date64) \
+ INNER(NAME, date32) \
+ INNER(NAME, time32) \
+ INNER(NAME, timestamp)
+
+NUMERIC_BOOL_DATE_TYPES(HASH32_OP, hash)
+NUMERIC_BOOL_DATE_TYPES(HASH32_OP, hash32)
+NUMERIC_BOOL_DATE_TYPES(HASH32_OP, hash32AsDouble)
+NUMERIC_BOOL_DATE_TYPES(HASH32_WITH_SEED_OP, hash32WithSeed)
+NUMERIC_BOOL_DATE_TYPES(HASH32_WITH_SEED_OP, hash32AsDoubleWithSeed)
+
+NUMERIC_BOOL_DATE_TYPES(HASH64_OP, hash64)
+NUMERIC_BOOL_DATE_TYPES(HASH64_OP, hash64AsDouble)
+NUMERIC_BOOL_DATE_TYPES(HASH64_WITH_SEED_OP, hash64WithSeed)
+NUMERIC_BOOL_DATE_TYPES(HASH64_WITH_SEED_OP, hash64AsDoubleWithSeed)
+
+#undef NUMERIC_BOOL_DATE_TYPES
+
+static inline gdv_uint64 murmur3_64_buf(const gdv_uint8* key, gdv_int32 len,
+ gdv_int32 seed) {
+ gdv_uint64 h1 = seed;
+ gdv_uint64 h2 = seed;
+ gdv_uint64 c1 = 0x87c37b91114253d5ull;
+ gdv_uint64 c2 = 0x4cf5ad432745937full;
+
+ const gdv_uint64* blocks = reinterpret_cast<const gdv_uint64*>(key);
+ int nblocks = len / 16;
+ for (int i = 0; i < nblocks; i++) {
+ gdv_uint64 k1 = blocks[i * 2 + 0];
+ gdv_uint64 k2 = blocks[i * 2 + 1];
+
+ k1 *= c1;
+ k1 = rotate_left(k1, 31);
+ k1 *= c2;
+ h1 ^= k1;
+ h1 = rotate_left(h1, 27);
+ h1 += h2;
+ h1 = h1 * 5 + 0x52dce729;
+ k2 *= c2;
+ k2 = rotate_left(k2, 33);
+ k2 *= c1;
+ h2 ^= k2;
+ h2 = rotate_left(h2, 31);
+ h2 += h1;
+ h2 = h2 * 5 + 0x38495ab5;
+ }
+
+ // tail
+ gdv_uint64 k1 = 0;
+ gdv_uint64 k2 = 0;
+
+ const gdv_uint8* tail = reinterpret_cast<const gdv_uint8*>(key + nblocks * 16);
+ switch (len & 15) {
+ case 15:
+ k2 = static_cast<gdv_uint64>(tail[14]) << 48;
+ case 14:
+ k2 ^= static_cast<gdv_uint64>(tail[13]) << 40;
+ case 13:
+ k2 ^= static_cast<gdv_uint64>(tail[12]) << 32;
+ case 12:
+ k2 ^= static_cast<gdv_uint64>(tail[11]) << 24;
+ case 11:
+ k2 ^= static_cast<gdv_uint64>(tail[10]) << 16;
+ case 10:
+ k2 ^= static_cast<gdv_uint64>(tail[9]) << 8;
+ case 9:
+ k2 ^= static_cast<gdv_uint64>(tail[8]);
+ k2 *= c2;
+ k2 = rotate_left(k2, 33);
+ k2 *= c1;
+ h2 ^= k2;
+ case 8:
+ k1 ^= static_cast<gdv_uint64>(tail[7]) << 56;
+ case 7:
+ k1 ^= static_cast<gdv_uint64>(tail[6]) << 48;
+ case 6:
+ k1 ^= static_cast<gdv_uint64>(tail[5]) << 40;
+ case 5:
+ k1 ^= static_cast<gdv_uint64>(tail[4]) << 32;
+ case 4:
+ k1 ^= static_cast<gdv_uint64>(tail[3]) << 24;
+ case 3:
+ k1 ^= static_cast<gdv_uint64>(tail[2]) << 16;
+ case 2:
+ k1 ^= static_cast<gdv_uint64>(tail[1]) << 8;
+ case 1:
+ k1 ^= static_cast<gdv_uint64>(tail[0]) << 0;
+ k1 *= c1;
+ k1 = rotate_left(k1, 31);
+ k1 *= c2;
+ h1 ^= k1;
+ }
+
+ h1 ^= len;
+ h2 ^= len;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = fmix64(h1);
+ h2 = fmix64(h2);
+
+ h1 += h2;
+ // h2 += h1;
+ // returning 64-bits of the 128-bit hash.
+ return h1;
+}
+
+static gdv_uint32 murmur3_32_buf(const gdv_uint8* key, gdv_int32 len, gdv_int32 seed) {
+ static const gdv_uint64 c1 = 0xcc9e2d51ull;
+ static const gdv_uint64 c2 = 0x1b873593ull;
+ static const gdv_uint64 UINT_MASK = 0xffffffffull;
+ gdv_uint64 lh1 = seed;
+ const gdv_uint32* blocks = reinterpret_cast<const gdv_uint32*>(key);
+ int nblocks = len / 4;
+ const gdv_uint8* tail = reinterpret_cast<const gdv_uint8*>(key + nblocks * 4);
+ for (int i = 0; i < nblocks; i++) {
+ gdv_uint64 lk1 = static_cast<gdv_uint64>(blocks[i]);
+
+ // k1 *= c1;
+ lk1 *= c1;
+ lk1 &= UINT_MASK;
+
+ lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17);
+
+ lk1 *= c2;
+ lk1 = lk1 & UINT_MASK;
+ lh1 ^= lk1;
+ lh1 = ((lh1 << 13) & UINT_MASK) | (lh1 >> 19);
+
+ lh1 = lh1 * 5 + 0xe6546b64ull;
+ lh1 = UINT_MASK & lh1;
+ }
+
+ // tail
+ gdv_uint64 lk1 = 0;
+
+ switch (len & 3) {
+ case 3:
+ lk1 = (tail[2] & 0xff) << 16;
+ case 2:
+ lk1 |= (tail[1] & 0xff) << 8;
+ case 1:
+ lk1 |= (tail[0] & 0xff);
+ lk1 *= c1;
+ lk1 = UINT_MASK & lk1;
+ lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17);
+
+ lk1 *= c2;
+ lk1 = lk1 & UINT_MASK;
+
+ lh1 ^= lk1;
+ }
+
+ // finalization
+ lh1 ^= len;
+
+ lh1 ^= lh1 >> 16;
+ lh1 *= 0x85ebca6b;
+ lh1 = UINT_MASK & lh1;
+ lh1 ^= lh1 >> 13;
+
+ lh1 *= 0xc2b2ae35;
+ lh1 = UINT_MASK & lh1;
+ lh1 ^= lh1 >> 16;
+
+ return static_cast<gdv_uint32>(lh1 & UINT_MASK);
+}
+
+FORCE_INLINE gdv_int64 hash64_buf(const gdv_uint8* buf, int len, gdv_int64 seed) {
+ return murmur3_64_buf(buf, len, static_cast<gdv_int32>(seed));
+}
+
+FORCE_INLINE gdv_int32 hash32_buf(const gdv_uint8* buf, int len, gdv_int32 seed) {
+ return murmur3_32_buf(buf, len, seed);
+}
+
+// Wrappers for the varlen types
+
+#define HASH64_BUF_WITH_SEED_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int64 NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid, \
+ gdv_int64 seed, gdv_boolean seed_isvalid) { \
+ if (!is_valid) { \
+ return seed; \
+ } \
+ return hash64_buf(reinterpret_cast<const uint8_t*>(in), len, seed); \
+ }
+
+#define HASH32_BUF_WITH_SEED_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int32 NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid, \
+ gdv_int32 seed, gdv_boolean seed_isvalid) { \
+ if (!is_valid) { \
+ return seed; \
+ } \
+ return hash32_buf(reinterpret_cast<const uint8_t*>(in), len, seed); \
+ }
+
+#define HASH64_BUF_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int64 NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid) { \
+ return is_valid ? hash64_buf(reinterpret_cast<const uint8_t*>(in), len, 0) : 0; \
+ }
+
+#define HASH32_BUF_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int32 NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid) { \
+ return is_valid ? hash32_buf(reinterpret_cast<const uint8_t*>(in), len, 0) : 0; \
+ }
+
+// Expand inner macro for all non-numeric types.
+#define VAR_LEN_TYPES(INNER, NAME) \
+ INNER(NAME, utf8) \
+ INNER(NAME, binary)
+
+VAR_LEN_TYPES(HASH32_BUF_OP, hash)
+VAR_LEN_TYPES(HASH32_BUF_OP, hash32)
+VAR_LEN_TYPES(HASH32_BUF_OP, hash32AsDouble)
+VAR_LEN_TYPES(HASH32_BUF_WITH_SEED_OP, hash32WithSeed)
+VAR_LEN_TYPES(HASH32_BUF_WITH_SEED_OP, hash32AsDoubleWithSeed)
+
+VAR_LEN_TYPES(HASH64_BUF_OP, hash64)
+VAR_LEN_TYPES(HASH64_BUF_OP, hash64AsDouble)
+VAR_LEN_TYPES(HASH64_BUF_WITH_SEED_OP, hash64WithSeed)
+VAR_LEN_TYPES(HASH64_BUF_WITH_SEED_OP, hash64AsDoubleWithSeed)
+
+#undef HASH32_BUF_OP
+#undef HASH32_BUF_WITH_SEED_OP
+#undef HASH32_OP
+#undef HASH32_WITH_SEED_OP
+#undef HASH64_BUF_OP
+#undef HASH64_BUF_WITH_SEED_OP
+#undef HASH64_OP
+#undef HASH64_WITH_SEED_OP
+
+} // extern "C"