]>
Commit | Line | Data |
---|---|---|
ad5611d8 TR |
1 | #include "clar_libgit2.h" |
2 | #include "git2/sys/hashsig.h" | |
3 | #include "futils.h" | |
4 | ||
5 | #define SIMILARITY_TEST_DATA_1 \ | |
6 | "000\n001\n002\n003\n004\n005\n006\n007\n008\n009\n" \ | |
7 | "010\n011\n012\n013\n014\n015\n016\n017\n018\n019\n" \ | |
8 | "020\n021\n022\n023\n024\n025\n026\n027\n028\n029\n" \ | |
9 | "030\n031\n032\n033\n034\n035\n036\n037\n038\n039\n" \ | |
10 | "040\n041\n042\n043\n044\n045\n046\n047\n048\n049\n" | |
11 | ||
12 | void test_core_hashsig__similarity_metric(void) | |
13 | { | |
14 | git_hashsig *a, *b; | |
15 | git_str buf = GIT_STR_INIT; | |
16 | int sim; | |
17 | ||
18 | /* in the first case, we compare data to itself and expect 100% match */ | |
19 | ||
20 | cl_git_pass(git_str_sets(&buf, SIMILARITY_TEST_DATA_1)); | |
21 | cl_git_pass(git_hashsig_create(&a, buf.ptr, buf.size, GIT_HASHSIG_NORMAL)); | |
22 | cl_git_pass(git_hashsig_create(&b, buf.ptr, buf.size, GIT_HASHSIG_NORMAL)); | |
23 | ||
24 | cl_assert_equal_i(100, git_hashsig_compare(a, b)); | |
25 | ||
26 | git_hashsig_free(a); | |
27 | git_hashsig_free(b); | |
28 | ||
29 | /* if we change just a single byte, how much does that change magnify? */ | |
30 | ||
31 | cl_git_pass(git_str_sets(&buf, SIMILARITY_TEST_DATA_1)); | |
32 | cl_git_pass(git_hashsig_create(&a, buf.ptr, buf.size, GIT_HASHSIG_NORMAL)); | |
33 | cl_git_pass(git_str_sets(&buf, | |
34 | "000\n001\n002\n003\n004\n005\n006\n007\n008\n009\n" \ | |
35 | "010\n011\n012\n013\n014\n015\n016\n017\n018\n019\n" \ | |
36 | "x020x\n021\n022\n023\n024\n025\n026\n027\n028\n029\n" \ | |
37 | "030\n031\n032\n033\n034\n035\n036\n037\n038\n039\n" \ | |
38 | "040\n041\n042\n043\n044\n045\n046\n047\n048\n049\n" | |
39 | )); | |
40 | cl_git_pass(git_hashsig_create(&b, buf.ptr, buf.size, GIT_HASHSIG_NORMAL)); | |
41 | ||
42 | sim = git_hashsig_compare(a, b); | |
43 | ||
44 | cl_assert_in_range(95, sim, 100); /* expect >95% similarity */ | |
45 | ||
46 | git_hashsig_free(a); | |
47 | git_hashsig_free(b); | |
48 | ||
49 | /* let's try comparing data to a superset of itself */ | |
50 | ||
51 | cl_git_pass(git_str_sets(&buf, SIMILARITY_TEST_DATA_1)); | |
52 | cl_git_pass(git_hashsig_create(&a, buf.ptr, buf.size, GIT_HASHSIG_NORMAL)); | |
53 | cl_git_pass(git_str_sets(&buf, SIMILARITY_TEST_DATA_1 | |
54 | "050\n051\n052\n053\n054\n055\n056\n057\n058\n059\n")); | |
55 | cl_git_pass(git_hashsig_create(&b, buf.ptr, buf.size, GIT_HASHSIG_NORMAL)); | |
56 | ||
57 | sim = git_hashsig_compare(a, b); | |
58 | /* 20% lines added ~= 10% lines changed */ | |
59 | ||
60 | cl_assert_in_range(85, sim, 95); /* expect similarity around 90% */ | |
61 | ||
62 | git_hashsig_free(a); | |
63 | git_hashsig_free(b); | |
64 | ||
65 | /* what if we keep about half the original data and add half new */ | |
66 | ||
67 | cl_git_pass(git_str_sets(&buf, SIMILARITY_TEST_DATA_1)); | |
68 | cl_git_pass(git_hashsig_create(&a, buf.ptr, buf.size, GIT_HASHSIG_NORMAL)); | |
69 | cl_git_pass(git_str_sets(&buf, | |
70 | "000\n001\n002\n003\n004\n005\n006\n007\n008\n009\n" \ | |
71 | "010\n011\n012\n013\n014\n015\n016\n017\n018\n019\n" \ | |
72 | "020x\n021\n022\n023\n024\n" \ | |
73 | "x25\nx26\nx27\nx28\nx29\n" \ | |
74 | "x30\nx31\nx32\nx33\nx34\nx35\nx36\nx37\nx38\nx39\n" \ | |
75 | "x40\nx41\nx42\nx43\nx44\nx45\nx46\nx47\nx48\nx49\n" | |
76 | )); | |
77 | cl_git_pass(git_hashsig_create(&b, buf.ptr, buf.size, GIT_HASHSIG_NORMAL)); | |
78 | ||
79 | sim = git_hashsig_compare(a, b); | |
80 | /* 50% lines changed */ | |
81 | ||
82 | cl_assert_in_range(40, sim, 60); /* expect in the 40-60% similarity range */ | |
83 | ||
84 | git_hashsig_free(a); | |
85 | git_hashsig_free(b); | |
86 | ||
87 | /* lastly, let's check that we can hash file content as well */ | |
88 | ||
89 | cl_git_pass(git_str_sets(&buf, SIMILARITY_TEST_DATA_1)); | |
90 | cl_git_pass(git_hashsig_create(&a, buf.ptr, buf.size, GIT_HASHSIG_NORMAL)); | |
91 | ||
92 | cl_git_pass(git_futils_mkdir("scratch", 0755, GIT_MKDIR_PATH)); | |
93 | cl_git_mkfile("scratch/testdata", SIMILARITY_TEST_DATA_1); | |
94 | cl_git_pass(git_hashsig_create_fromfile( | |
95 | &b, "scratch/testdata", GIT_HASHSIG_NORMAL)); | |
96 | ||
97 | cl_assert_equal_i(100, git_hashsig_compare(a, b)); | |
98 | ||
99 | git_hashsig_free(a); | |
100 | git_hashsig_free(b); | |
101 | ||
102 | git_str_dispose(&buf); | |
103 | git_futils_rmdir_r("scratch", NULL, GIT_RMDIR_REMOVE_FILES); | |
104 | } | |
105 | ||
106 | void test_core_hashsig__similarity_metric_whitespace(void) | |
107 | { | |
108 | git_hashsig *a, *b; | |
109 | git_str buf = GIT_STR_INIT; | |
110 | int sim, i, j; | |
111 | git_hashsig_option_t opt; | |
112 | const char *tabbed = | |
113 | " for (s = 0; s < sizeof(sep) / sizeof(char); ++s) {\n" | |
114 | " separator = sep[s];\n" | |
115 | " expect = expect_values[s];\n" | |
116 | "\n" | |
117 | " for (j = 0; j < sizeof(b) / sizeof(char*); ++j) {\n" | |
118 | " for (i = 0; i < sizeof(a) / sizeof(char*); ++i) {\n" | |
119 | " git_str_join(&buf, separator, a[i], b[j]);\n" | |
120 | " cl_assert_equal_s(*expect, buf.ptr);\n" | |
121 | " expect++;\n" | |
122 | " }\n" | |
123 | " }\n" | |
124 | " }\n"; | |
125 | const char *spaced = | |
126 | " for (s = 0; s < sizeof(sep) / sizeof(char); ++s) {\n" | |
127 | " separator = sep[s];\n" | |
128 | " expect = expect_values[s];\n" | |
129 | "\n" | |
130 | " for (j = 0; j < sizeof(b) / sizeof(char*); ++j) {\n" | |
131 | " for (i = 0; i < sizeof(a) / sizeof(char*); ++i) {\n" | |
132 | " git_str_join(&buf, separator, a[i], b[j]);\n" | |
133 | " cl_assert_equal_s(*expect, buf.ptr);\n" | |
134 | " expect++;\n" | |
135 | " }\n" | |
136 | " }\n" | |
137 | " }\n"; | |
138 | const char *crlf_spaced2 = | |
139 | " for (s = 0; s < sizeof(sep) / sizeof(char); ++s) {\r\n" | |
140 | " separator = sep[s];\r\n" | |
141 | " expect = expect_values[s];\r\n" | |
142 | "\r\n" | |
143 | " for (j = 0; j < sizeof(b) / sizeof(char*); ++j) {\r\n" | |
144 | " for (i = 0; i < sizeof(a) / sizeof(char*); ++i) {\r\n" | |
145 | " git_str_join(&buf, separator, a[i], b[j]);\r\n" | |
146 | " cl_assert_equal_s(*expect, buf.ptr);\r\n" | |
147 | " expect++;\r\n" | |
148 | " }\r\n" | |
149 | " }\r\n" | |
150 | " }\r\n"; | |
151 | const char *text[3] = { tabbed, spaced, crlf_spaced2 }; | |
152 | ||
153 | /* let's try variations of our own code with whitespace changes */ | |
154 | ||
155 | for (opt = GIT_HASHSIG_NORMAL; opt <= GIT_HASHSIG_SMART_WHITESPACE; ++opt) { | |
156 | for (i = 0; i < 3; ++i) { | |
157 | for (j = 0; j < 3; ++j) { | |
158 | cl_git_pass(git_str_sets(&buf, text[i])); | |
159 | cl_git_pass(git_hashsig_create(&a, buf.ptr, buf.size, opt)); | |
160 | ||
161 | cl_git_pass(git_str_sets(&buf, text[j])); | |
162 | cl_git_pass(git_hashsig_create(&b, buf.ptr, buf.size, opt)); | |
163 | ||
164 | sim = git_hashsig_compare(a, b); | |
165 | ||
166 | if (opt == GIT_HASHSIG_NORMAL) { | |
167 | if (i == j) | |
168 | cl_assert_equal_i(100, sim); | |
169 | else | |
170 | cl_assert_in_range(0, sim, 30); /* pretty different */ | |
171 | } else { | |
172 | cl_assert_equal_i(100, sim); | |
173 | } | |
174 | ||
175 | git_hashsig_free(a); | |
176 | git_hashsig_free(b); | |
177 | } | |
178 | } | |
179 | } | |
180 | ||
181 | git_str_dispose(&buf); | |
182 | } |