]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.arrow.memory.util.hash; | |
19 | ||
20 | ||
21 | import org.apache.arrow.memory.ArrowBuf; | |
22 | import org.apache.arrow.memory.util.MemoryUtil; | |
23 | ||
24 | /** | |
25 | * A simple hasher that calculates the hash code of integers as is, | |
26 | * and does not perform any finalization. So the computation is extremely | |
27 | * efficient. | |
28 | * <p> | |
29 | * This algorithm only provides the most basic semantics for the hash code. That is, | |
30 | * if two objects are equal, they must have equal hash code. However, the quality of the | |
31 | * produced hash code may not be good. In other words, the generated hash codes are | |
32 | * far from being uniformly distributed in the universe. | |
33 | * </p> | |
34 | * <p> | |
35 | * Therefore, this algorithm is suitable only for scenarios where the most basic semantics | |
36 | * of the hash code is required (e.g. in scenarios that require fast and proactive data pruning) | |
37 | * </p> | |
38 | * <p> | |
39 | * An object of this class is stateless, so it can be shared between threads. | |
40 | * </p> | |
41 | */ | |
42 | public class SimpleHasher implements ArrowBufHasher { | |
43 | ||
44 | public static SimpleHasher INSTANCE = new SimpleHasher(); | |
45 | ||
46 | protected SimpleHasher() { | |
47 | } | |
48 | ||
49 | /** | |
50 | * Calculates the hash code for a memory region. | |
51 | * @param address start address of the memory region. | |
52 | * @param length length of the memory region. | |
53 | * @return the hash code. | |
54 | */ | |
55 | public int hashCode(long address, long length) { | |
56 | int hashValue = 0; | |
57 | int index = 0; | |
58 | while (index + 8 <= length) { | |
59 | long longValue = MemoryUtil.UNSAFE.getLong(address + index); | |
60 | int longHash = getLongHashCode(longValue); | |
61 | hashValue = combineHashCode(hashValue, longHash); | |
62 | index += 8; | |
63 | } | |
64 | ||
65 | if (index + 4 <= length) { | |
66 | int intValue = MemoryUtil.UNSAFE.getInt(address + index); | |
67 | int intHash = intValue; | |
68 | hashValue = combineHashCode(hashValue, intHash); | |
69 | index += 4; | |
70 | } | |
71 | ||
72 | while (index < length) { | |
73 | byte byteValue = MemoryUtil.UNSAFE.getByte(address + index); | |
74 | int byteHash = byteValue; | |
75 | hashValue = combineHashCode(hashValue, byteHash); | |
76 | index += 1; | |
77 | } | |
78 | ||
79 | return finalizeHashCode(hashValue); | |
80 | } | |
81 | ||
82 | /** | |
83 | * Calculates the hash code for a memory region. | |
84 | * @param buf the buffer for the memory region. | |
85 | * @param offset offset within the buffer for the memory region. | |
86 | * @param length length of the memory region. | |
87 | * @return the hash code. | |
88 | */ | |
89 | @Override | |
90 | public int hashCode(ArrowBuf buf, long offset, long length) { | |
91 | buf.checkBytes(offset, offset + length); | |
92 | return hashCode(buf.memoryAddress() + offset, length); | |
93 | } | |
94 | ||
95 | protected int combineHashCode(int currentHashCode, int newHashCode) { | |
96 | return currentHashCode * 37 + newHashCode; | |
97 | } | |
98 | ||
99 | protected int getLongHashCode(long longValue) { | |
100 | return Long.hashCode(longValue); | |
101 | } | |
102 | ||
103 | protected int finalizeHashCode(int hashCode) { | |
104 | return hashCode; | |
105 | } | |
106 | ||
107 | @Override | |
108 | public int hashCode() { | |
109 | return 123; | |
110 | } | |
111 | ||
112 | @Override | |
113 | public boolean equals(Object obj) { | |
114 | return obj != null && (obj instanceof SimpleHasher); | |
115 | } | |
116 | } |