]>
git.proxmox.com Git - ceph.git/blob - ceph/src/utf8proc/test/iterate.c
8 #define CHECKVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,len,__LINE__)
9 #define CHECKINVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,UTF8PROC_ERROR_INVALIDUTF8,__LINE__)
11 static void testbytes(unsigned char *buf
, int len
, utf8proc_ssize_t retval
, int line
)
13 utf8proc_int32_t out
[16];
16 /* Make a copy to ensure that memory is left uninitialized after "len"
17 * bytes. This way, Valgrind can detect overreads.
19 unsigned char tmp
[16];
20 memcpy(tmp
, buf
, len
);
23 if ((ret
= utf8proc_iterate(tmp
, len
, out
)) != retval
) {
24 fprintf(stderr
, "Failed (%d):", line
);
25 for (int i
= 0; i
< len
; i
++) {
26 fprintf(stderr
, " 0x%02x", tmp
[i
]);
28 fprintf(stderr
, " -> %zd\n", ret
);
33 int main(int argc
, char **argv
)
36 unsigned char buf
[16];
40 // Check valid sequences that were considered valid erroneously before
43 for (byt
= 0x90; byt
< 0xa0; byt
++) {
44 CHECKVALID(2, byt
, 3);
46 // Check 0xfffe and 0xffff
48 CHECKVALID(2, 0xbe, 3);
49 CHECKVALID(2, 0xbf, 3);
50 // Check 0x??fffe & 0x??ffff
51 for (byt
= 0x1fffe; byt
< 0x110000; byt
+= 0x10000) {
52 buf
[0] = 0xf0 | (byt
>> 18);
53 buf
[1] = 0x80 | ((byt
>> 12) & 0x3f);
54 CHECKVALID(3, 0xbe, 4);
55 CHECKVALID(3, 0xbf, 4);
58 // Continuation byte not after lead
59 for (byt
= 0x80; byt
< 0xc0; byt
++) {
60 CHECKINVALID(0, byt
, 1);
63 // Continuation byte not after lead
64 for (byt
= 0x80; byt
< 0xc0; byt
++) {
65 CHECKINVALID(0, byt
, 1);
69 for (byt
= 0xc0; byt
<= 0xff; byt
++) {
70 // Single lead byte at end of string
71 CHECKINVALID(0, byt
, 1);
72 // Lead followed by non-continuation character < 0x80
73 CHECKINVALID(1, 65, 2);
74 // Lead followed by non-continuation character > 0xbf
75 CHECKINVALID(1, 0xc0, 2);
78 // Test overlong 2-byte
80 for (byt
= 0x81; byt
<= 0xbf; byt
++) {
81 CHECKINVALID(1, byt
, 2);
84 for (byt
= 0x80; byt
<= 0xbf; byt
++) {
85 CHECKINVALID(1, byt
, 2);
88 // Test overlong 3-byte
91 for (byt
= 0x80; byt
<= 0x9f; byt
++) {
92 CHECKINVALID(1, byt
, 3);
95 // Test overlong 4-byte
99 for (byt
= 0x80; byt
<= 0x8f; byt
++) {
100 CHECKINVALID(1, byt
, 4);
103 // Test 4-byte > 0x10ffff
107 for (byt
= 0x90; byt
<= 0xbf; byt
++) {
108 CHECKINVALID(1, byt
, 4);
111 for (byt
= 0xf5; byt
<= 0xf7; byt
++) {
112 CHECKINVALID(0, byt
, 4);
117 for (byt
= 0xf8; byt
<= 0xfb; byt
++) {
118 CHECKINVALID(0, byt
, 5);
123 for (byt
= 0xfc; byt
<= 0xfd; byt
++) {
124 CHECKINVALID(0, byt
, 6);
129 CHECKINVALID(0, 0xfe, 7);
131 // Three and above byte sequences
132 for (byt
= 0xe0; byt
< 0xf0; byt
++) {
133 // Lead followed by only 1 continuation byte
134 CHECKINVALID(0, byt
, 2);
135 // Lead ended by non-continuation character < 0x80
136 CHECKINVALID(2, 65, 3);
137 // Lead ended by non-continuation character > 0xbf
138 CHECKINVALID(2, 0xc0, 3);
141 // 3-byte encoded surrogate character(s)
142 buf
[0] = 0xed; buf
[2] = 0x80;
144 CHECKINVALID(1, 0xa0, 3);
145 // Trailing surrogate first
146 CHECKINVALID(1, 0xb0, 3);
148 // Four byte sequences
150 for (byt
= 0xf0; byt
< 0xf5; byt
++) {
151 // Lead followed by only 1 continuation bytes
152 CHECKINVALID(0, byt
, 2);
153 // Lead followed by only 2 continuation bytes
154 CHECKINVALID(0, byt
, 3);
155 // Lead followed by non-continuation character < 0x80
156 CHECKINVALID(3, 65, 4);
157 // Lead followed by non-continuation character > 0xbf
158 CHECKINVALID(3, 0xc0, 4);
162 check(!error
, "utf8proc_iterate FAILED %d tests out of %d", error
, tests
);
163 printf("utf8proc_iterate tests SUCCEEDED, (%d) tests passed.\n", tests
);