]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
9879b94f | 2 | * Copyright (c) 2009, 2012 Nicira, Inc. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include <inttypes.h> | |
19 | #include <stdio.h> | |
20 | #include <stdlib.h> | |
21 | #include <string.h> | |
22 | #include "hash.h" | |
c49d1dd1 | 23 | #include "jhash.h" |
064af421 BP |
24 | |
25 | #undef NDEBUG | |
26 | #include <assert.h> | |
27 | ||
28 | static void | |
29 | set_bit(uint32_t array[3], int bit) | |
30 | { | |
31 | assert(bit >= 0 && bit <= 96); | |
32 | memset(array, 0, sizeof(uint32_t) * 3); | |
33 | if (bit < 96) { | |
34 | array[bit / 32] = UINT32_C(1) << (bit % 32); | |
35 | } | |
36 | } | |
37 | ||
38 | static uint32_t | |
39 | hash_words_cb(uint32_t input) | |
40 | { | |
41 | return hash_words(&input, 1, 0); | |
42 | } | |
43 | ||
9879b94f | 44 | static uint32_t |
c49d1dd1 | 45 | jhash_words_cb(uint32_t input) |
9879b94f | 46 | { |
c49d1dd1 | 47 | return jhash_words(&input, 1, 0); |
9879b94f BP |
48 | } |
49 | ||
064af421 BP |
50 | static uint32_t |
51 | hash_int_cb(uint32_t input) | |
52 | { | |
53 | return hash_int(input, 0); | |
54 | } | |
55 | ||
56 | static void | |
57 | check_word_hash(uint32_t (*hash)(uint32_t), const char *name, | |
58 | int min_unique) | |
59 | { | |
60 | int i, j; | |
61 | ||
62 | for (i = 0; i <= 32; i++) { | |
63 | uint32_t in1 = i < 32 ? UINT32_C(1) << i : 0; | |
64 | for (j = i + 1; j <= 32; j++) { | |
65 | uint32_t in2 = j < 32 ? UINT32_C(1) << j : 0; | |
66 | uint32_t out1 = hash(in1); | |
67 | uint32_t out2 = hash(in2); | |
68 | const uint32_t unique_mask = (UINT32_C(1) << min_unique) - 1; | |
69 | int ofs; | |
70 | for (ofs = 0; ofs < 32 - min_unique; ofs++) { | |
71 | uint32_t bits1 = (out1 >> ofs) & unique_mask; | |
72 | uint32_t bits2 = (out2 >> ofs) & unique_mask; | |
73 | if (bits1 == bits2) { | |
74 | printf("Partial collision for '%s':\n", name); | |
75 | printf("%s(%08"PRIx32") = %08"PRIx32"\n", name, in1, out1); | |
76 | printf("%s(%08"PRIx32") = %08"PRIx32"\n", name, in2, out2); | |
77 | printf("%d bits of output starting at bit %d " | |
78 | "are both 0x%"PRIx32"\n", min_unique, ofs, bits1); | |
79 | exit(1); | |
80 | } | |
81 | } | |
82 | } | |
83 | } | |
84 | } | |
85 | ||
9879b94f BP |
86 | static void |
87 | check_3word_hash(uint32_t (*hash)(const uint32_t[], size_t, uint32_t), | |
88 | const char *name) | |
064af421 BP |
89 | { |
90 | int i, j; | |
91 | ||
9879b94f BP |
92 | for (i = 0; i <= 96; i++) { |
93 | for (j = i + 1; j <= 96; j++) { | |
94 | uint32_t in1[3], in2[3]; | |
95 | uint32_t out1, out2; | |
96 | const int min_unique = 12; | |
97 | const uint32_t unique_mask = (UINT32_C(1) << min_unique) - 1; | |
98 | ||
99 | set_bit(in1, i); | |
100 | set_bit(in2, j); | |
101 | out1 = hash(in1, 3, 0); | |
102 | out2 = hash(in2, 3, 0); | |
103 | if ((out1 & unique_mask) == (out2 & unique_mask)) { | |
104 | printf("%s has a partial collision:\n", name); | |
105 | printf("hash(1 << %d) == %08"PRIx32"\n", i, out1); | |
106 | printf("hash(1 << %d) == %08"PRIx32"\n", j, out2); | |
107 | printf("The low-order %d bits of output are both " | |
108 | "0x%"PRIx32"\n", min_unique, out1 & unique_mask); | |
109 | } | |
110 | } | |
111 | } | |
112 | } | |
113 | ||
114 | int | |
115 | main(void) | |
116 | { | |
064af421 BP |
117 | /* Check that all hashes computed with hash_words with one 1-bit (or no |
118 | * 1-bits) set within a single 32-bit word have different values in all | |
119 | * 11-bit consecutive runs. | |
120 | * | |
121 | * Given a random distribution, the probability of at least one collision | |
122 | * in any set of 11 bits is approximately | |
123 | * | |
124 | * 1 - ((2**11 - 1)/2**11)**C(33,2) | |
125 | * == 1 - (2047/2048)**528 | |
126 | * =~ 0.22 | |
127 | * | |
128 | * There are 21 ways to pick 11 consecutive bits in a 32-bit word, so if we | |
129 | * assumed independence then the chance of having no collisions in any of | |
130 | * those 11-bit runs would be (1-0.22)**21 =~ .0044. Obviously | |
131 | * independence must be a bad assumption :-) | |
132 | */ | |
133 | check_word_hash(hash_words_cb, "hash_words", 11); | |
c49d1dd1 | 134 | check_word_hash(jhash_words_cb, "jhash_words", 11); |
064af421 BP |
135 | |
136 | /* Check that all hash functions of with one 1-bit (or no 1-bits) set | |
137 | * within three 32-bit words have different values in their lowest 12 | |
138 | * bits. | |
139 | * | |
140 | * Given a random distribution, the probability of at least one collision | |
141 | * in 12 bits is approximately | |
142 | * | |
143 | * 1 - ((2**12 - 1)/2**12)**C(97,2) | |
144 | * == 1 - (4095/4096)**4656 | |
145 | * =~ 0.68 | |
146 | * | |
147 | * so we are doing pretty well to not have any collisions in 12 bits. | |
148 | */ | |
9879b94f | 149 | check_3word_hash(hash_words, "hash_words"); |
c49d1dd1 | 150 | check_3word_hash(jhash_words, "jhash_words"); |
064af421 BP |
151 | |
152 | /* Check that all hashes computed with hash_int with one 1-bit (or no | |
153 | * 1-bits) set within a single 32-bit word have different values in all | |
c49d1dd1 | 154 | * 12-bit consecutive runs. |
064af421 BP |
155 | * |
156 | * Given a random distribution, the probability of at least one collision | |
c49d1dd1 | 157 | * in any set of 12 bits is approximately |
064af421 | 158 | * |
c49d1dd1 BP |
159 | * 1 - ((2**12 - 1)/2**12)**C(33,2) |
160 | * == 1 - (4,095/4,096)**528 | |
161 | * =~ 0.12 | |
064af421 | 162 | * |
c49d1dd1 | 163 | * There are 20 ways to pick 12 consecutive bits in a 32-bit word, so if we |
064af421 | 164 | * assumed independence then the chance of having no collisions in any of |
c49d1dd1 BP |
165 | * those 12-bit runs would be (1-0.12)**20 =~ 0.078. This refutes our |
166 | * assumption of independence, which makes it seem like a good hash | |
167 | * function. | |
064af421 | 168 | */ |
c49d1dd1 | 169 | check_word_hash(hash_int_cb, "hash_int", 12); |
064af421 BP |
170 | |
171 | return 0; | |
172 | } |