]> git.proxmox.com Git - libgit2.git/blame - tests/libgit2/core/hashsig.c
Merge https://salsa.debian.org/debian/libgit2 into proxmox/bullseye
[libgit2.git] / tests / libgit2 / core / hashsig.c
CommitLineData
ad5611d8
TR
1#include "clar_libgit2.h"
2#include "git2/sys/hashsig.h"
3#include "futils.h"
4
5#define SIMILARITY_TEST_DATA_1 \
6 "000\n001\n002\n003\n004\n005\n006\n007\n008\n009\n" \
7 "010\n011\n012\n013\n014\n015\n016\n017\n018\n019\n" \
8 "020\n021\n022\n023\n024\n025\n026\n027\n028\n029\n" \
9 "030\n031\n032\n033\n034\n035\n036\n037\n038\n039\n" \
10 "040\n041\n042\n043\n044\n045\n046\n047\n048\n049\n"
11
12void test_core_hashsig__similarity_metric(void)
13{
14 git_hashsig *a, *b;
15 git_str buf = GIT_STR_INIT;
16 int sim;
17
18 /* in the first case, we compare data to itself and expect 100% match */
19
20 cl_git_pass(git_str_sets(&buf, SIMILARITY_TEST_DATA_1));
21 cl_git_pass(git_hashsig_create(&a, buf.ptr, buf.size, GIT_HASHSIG_NORMAL));
22 cl_git_pass(git_hashsig_create(&b, buf.ptr, buf.size, GIT_HASHSIG_NORMAL));
23
24 cl_assert_equal_i(100, git_hashsig_compare(a, b));
25
26 git_hashsig_free(a);
27 git_hashsig_free(b);
28
29 /* if we change just a single byte, how much does that change magnify? */
30
31 cl_git_pass(git_str_sets(&buf, SIMILARITY_TEST_DATA_1));
32 cl_git_pass(git_hashsig_create(&a, buf.ptr, buf.size, GIT_HASHSIG_NORMAL));
33 cl_git_pass(git_str_sets(&buf,
34 "000\n001\n002\n003\n004\n005\n006\n007\n008\n009\n" \
35 "010\n011\n012\n013\n014\n015\n016\n017\n018\n019\n" \
36 "x020x\n021\n022\n023\n024\n025\n026\n027\n028\n029\n" \
37 "030\n031\n032\n033\n034\n035\n036\n037\n038\n039\n" \
38 "040\n041\n042\n043\n044\n045\n046\n047\n048\n049\n"
39 ));
40 cl_git_pass(git_hashsig_create(&b, buf.ptr, buf.size, GIT_HASHSIG_NORMAL));
41
42 sim = git_hashsig_compare(a, b);
43
44 cl_assert_in_range(95, sim, 100); /* expect >95% similarity */
45
46 git_hashsig_free(a);
47 git_hashsig_free(b);
48
49 /* let's try comparing data to a superset of itself */
50
51 cl_git_pass(git_str_sets(&buf, SIMILARITY_TEST_DATA_1));
52 cl_git_pass(git_hashsig_create(&a, buf.ptr, buf.size, GIT_HASHSIG_NORMAL));
53 cl_git_pass(git_str_sets(&buf, SIMILARITY_TEST_DATA_1
54 "050\n051\n052\n053\n054\n055\n056\n057\n058\n059\n"));
55 cl_git_pass(git_hashsig_create(&b, buf.ptr, buf.size, GIT_HASHSIG_NORMAL));
56
57 sim = git_hashsig_compare(a, b);
58 /* 20% lines added ~= 10% lines changed */
59
60 cl_assert_in_range(85, sim, 95); /* expect similarity around 90% */
61
62 git_hashsig_free(a);
63 git_hashsig_free(b);
64
65 /* what if we keep about half the original data and add half new */
66
67 cl_git_pass(git_str_sets(&buf, SIMILARITY_TEST_DATA_1));
68 cl_git_pass(git_hashsig_create(&a, buf.ptr, buf.size, GIT_HASHSIG_NORMAL));
69 cl_git_pass(git_str_sets(&buf,
70 "000\n001\n002\n003\n004\n005\n006\n007\n008\n009\n" \
71 "010\n011\n012\n013\n014\n015\n016\n017\n018\n019\n" \
72 "020x\n021\n022\n023\n024\n" \
73 "x25\nx26\nx27\nx28\nx29\n" \
74 "x30\nx31\nx32\nx33\nx34\nx35\nx36\nx37\nx38\nx39\n" \
75 "x40\nx41\nx42\nx43\nx44\nx45\nx46\nx47\nx48\nx49\n"
76 ));
77 cl_git_pass(git_hashsig_create(&b, buf.ptr, buf.size, GIT_HASHSIG_NORMAL));
78
79 sim = git_hashsig_compare(a, b);
80 /* 50% lines changed */
81
82 cl_assert_in_range(40, sim, 60); /* expect in the 40-60% similarity range */
83
84 git_hashsig_free(a);
85 git_hashsig_free(b);
86
87 /* lastly, let's check that we can hash file content as well */
88
89 cl_git_pass(git_str_sets(&buf, SIMILARITY_TEST_DATA_1));
90 cl_git_pass(git_hashsig_create(&a, buf.ptr, buf.size, GIT_HASHSIG_NORMAL));
91
92 cl_git_pass(git_futils_mkdir("scratch", 0755, GIT_MKDIR_PATH));
93 cl_git_mkfile("scratch/testdata", SIMILARITY_TEST_DATA_1);
94 cl_git_pass(git_hashsig_create_fromfile(
95 &b, "scratch/testdata", GIT_HASHSIG_NORMAL));
96
97 cl_assert_equal_i(100, git_hashsig_compare(a, b));
98
99 git_hashsig_free(a);
100 git_hashsig_free(b);
101
102 git_str_dispose(&buf);
103 git_futils_rmdir_r("scratch", NULL, GIT_RMDIR_REMOVE_FILES);
104}
105
106void test_core_hashsig__similarity_metric_whitespace(void)
107{
108 git_hashsig *a, *b;
109 git_str buf = GIT_STR_INIT;
110 int sim, i, j;
111 git_hashsig_option_t opt;
112 const char *tabbed =
113 " for (s = 0; s < sizeof(sep) / sizeof(char); ++s) {\n"
114 " separator = sep[s];\n"
115 " expect = expect_values[s];\n"
116 "\n"
117 " for (j = 0; j < sizeof(b) / sizeof(char*); ++j) {\n"
118 " for (i = 0; i < sizeof(a) / sizeof(char*); ++i) {\n"
119 " git_str_join(&buf, separator, a[i], b[j]);\n"
120 " cl_assert_equal_s(*expect, buf.ptr);\n"
121 " expect++;\n"
122 " }\n"
123 " }\n"
124 " }\n";
125 const char *spaced =
126 " for (s = 0; s < sizeof(sep) / sizeof(char); ++s) {\n"
127 " separator = sep[s];\n"
128 " expect = expect_values[s];\n"
129 "\n"
130 " for (j = 0; j < sizeof(b) / sizeof(char*); ++j) {\n"
131 " for (i = 0; i < sizeof(a) / sizeof(char*); ++i) {\n"
132 " git_str_join(&buf, separator, a[i], b[j]);\n"
133 " cl_assert_equal_s(*expect, buf.ptr);\n"
134 " expect++;\n"
135 " }\n"
136 " }\n"
137 " }\n";
138 const char *crlf_spaced2 =
139 " for (s = 0; s < sizeof(sep) / sizeof(char); ++s) {\r\n"
140 " separator = sep[s];\r\n"
141 " expect = expect_values[s];\r\n"
142 "\r\n"
143 " for (j = 0; j < sizeof(b) / sizeof(char*); ++j) {\r\n"
144 " for (i = 0; i < sizeof(a) / sizeof(char*); ++i) {\r\n"
145 " git_str_join(&buf, separator, a[i], b[j]);\r\n"
146 " cl_assert_equal_s(*expect, buf.ptr);\r\n"
147 " expect++;\r\n"
148 " }\r\n"
149 " }\r\n"
150 " }\r\n";
151 const char *text[3] = { tabbed, spaced, crlf_spaced2 };
152
153 /* let's try variations of our own code with whitespace changes */
154
155 for (opt = GIT_HASHSIG_NORMAL; opt <= GIT_HASHSIG_SMART_WHITESPACE; ++opt) {
156 for (i = 0; i < 3; ++i) {
157 for (j = 0; j < 3; ++j) {
158 cl_git_pass(git_str_sets(&buf, text[i]));
159 cl_git_pass(git_hashsig_create(&a, buf.ptr, buf.size, opt));
160
161 cl_git_pass(git_str_sets(&buf, text[j]));
162 cl_git_pass(git_hashsig_create(&b, buf.ptr, buf.size, opt));
163
164 sim = git_hashsig_compare(a, b);
165
166 if (opt == GIT_HASHSIG_NORMAL) {
167 if (i == j)
168 cl_assert_equal_i(100, sim);
169 else
170 cl_assert_in_range(0, sim, 30); /* pretty different */
171 } else {
172 cl_assert_equal_i(100, sim);
173 }
174
175 git_hashsig_free(a);
176 git_hashsig_free(b);
177 }
178 }
179 }
180
181 git_str_dispose(&buf);
182}