]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | #include "tests.h" |
2 | #include <ctype.h> | |
3 | #include <wchar.h> | |
4 | ||
5 | static int tests; | |
6 | static int error; | |
7 | ||
8 | #define CHECKVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,len,__LINE__) | |
9 | #define CHECKINVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,UTF8PROC_ERROR_INVALIDUTF8,__LINE__) | |
10 | ||
11 | static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int line) | |
12 | { | |
13 | utf8proc_int32_t out[16]; | |
14 | utf8proc_ssize_t ret; | |
15 | ||
16 | /* Make a copy to ensure that memory is left uninitialized after "len" | |
17 | * bytes. This way, Valgrind can detect overreads. | |
18 | */ | |
19 | unsigned char tmp[16]; | |
20 | memcpy(tmp, buf, len); | |
21 | ||
22 | tests++; | |
23 | if ((ret = utf8proc_iterate(tmp, len, out)) != retval) { | |
24 | fprintf(stderr, "Failed (%d):", line); | |
25 | for (int i = 0; i < len ; i++) { | |
26 | fprintf(stderr, " 0x%02x", tmp[i]); | |
27 | } | |
28 | fprintf(stderr, " -> %zd\n", ret); | |
29 | error++; | |
30 | } | |
31 | } | |
32 | ||
33 | int main(int argc, char **argv) | |
34 | { | |
35 | uint32_t byt; | |
36 | unsigned char buf[16]; | |
37 | ||
38 | tests = error = 0; | |
39 | ||
40 | // Check valid sequences that were considered valid erroneously before | |
41 | buf[0] = 0xef; | |
42 | buf[1] = 0xb7; | |
43 | for (byt = 0x90; byt < 0xa0; byt++) { | |
44 | CHECKVALID(2, byt, 3); | |
45 | } | |
46 | // Check 0xfffe and 0xffff | |
47 | buf[1] = 0xbf; | |
48 | CHECKVALID(2, 0xbe, 3); | |
49 | CHECKVALID(2, 0xbf, 3); | |
50 | // Check 0x??fffe & 0x??ffff | |
51 | for (byt = 0x1fffe; byt < 0x110000; byt += 0x10000) { | |
52 | buf[0] = 0xf0 | (byt >> 18); | |
53 | buf[1] = 0x80 | ((byt >> 12) & 0x3f); | |
54 | CHECKVALID(3, 0xbe, 4); | |
55 | CHECKVALID(3, 0xbf, 4); | |
56 | } | |
57 | ||
58 | // Continuation byte not after lead | |
59 | for (byt = 0x80; byt < 0xc0; byt++) { | |
60 | CHECKINVALID(0, byt, 1); | |
61 | } | |
62 | ||
63 | // Continuation byte not after lead | |
64 | for (byt = 0x80; byt < 0xc0; byt++) { | |
65 | CHECKINVALID(0, byt, 1); | |
66 | } | |
67 | ||
68 | // Test lead bytes | |
69 | for (byt = 0xc0; byt <= 0xff; byt++) { | |
70 | // Single lead byte at end of string | |
71 | CHECKINVALID(0, byt, 1); | |
72 | // Lead followed by non-continuation character < 0x80 | |
73 | CHECKINVALID(1, 65, 2); | |
74 | // Lead followed by non-continuation character > 0xbf | |
75 | CHECKINVALID(1, 0xc0, 2); | |
76 | } | |
77 | ||
78 | // Test overlong 2-byte | |
79 | buf[0] = 0xc0; | |
80 | for (byt = 0x81; byt <= 0xbf; byt++) { | |
81 | CHECKINVALID(1, byt, 2); | |
82 | } | |
83 | buf[0] = 0xc1; | |
84 | for (byt = 0x80; byt <= 0xbf; byt++) { | |
85 | CHECKINVALID(1, byt, 2); | |
86 | } | |
87 | ||
88 | // Test overlong 3-byte | |
89 | buf[0] = 0xe0; | |
90 | buf[2] = 0x80; | |
91 | for (byt = 0x80; byt <= 0x9f; byt++) { | |
92 | CHECKINVALID(1, byt, 3); | |
93 | } | |
94 | ||
95 | // Test overlong 4-byte | |
96 | buf[0] = 0xf0; | |
97 | buf[2] = 0x80; | |
98 | buf[3] = 0x80; | |
99 | for (byt = 0x80; byt <= 0x8f; byt++) { | |
100 | CHECKINVALID(1, byt, 4); | |
101 | } | |
102 | ||
103 | // Test 4-byte > 0x10ffff | |
104 | buf[0] = 0xf4; | |
105 | buf[2] = 0x80; | |
106 | buf[3] = 0x80; | |
107 | for (byt = 0x90; byt <= 0xbf; byt++) { | |
108 | CHECKINVALID(1, byt, 4); | |
109 | } | |
110 | buf[1] = 0x80; | |
111 | for (byt = 0xf5; byt <= 0xf7; byt++) { | |
112 | CHECKINVALID(0, byt, 4); | |
113 | } | |
114 | ||
115 | // Test 5-byte | |
116 | buf[4] = 0x80; | |
117 | for (byt = 0xf8; byt <= 0xfb; byt++) { | |
118 | CHECKINVALID(0, byt, 5); | |
119 | } | |
120 | ||
121 | // Test 6-byte | |
122 | buf[5] = 0x80; | |
123 | for (byt = 0xfc; byt <= 0xfd; byt++) { | |
124 | CHECKINVALID(0, byt, 6); | |
125 | } | |
126 | ||
127 | // Test 7-byte | |
128 | buf[6] = 0x80; | |
129 | CHECKINVALID(0, 0xfe, 7); | |
130 | ||
131 | // Three and above byte sequences | |
132 | for (byt = 0xe0; byt < 0xf0; byt++) { | |
133 | // Lead followed by only 1 continuation byte | |
134 | CHECKINVALID(0, byt, 2); | |
135 | // Lead ended by non-continuation character < 0x80 | |
136 | CHECKINVALID(2, 65, 3); | |
137 | // Lead ended by non-continuation character > 0xbf | |
138 | CHECKINVALID(2, 0xc0, 3); | |
139 | } | |
140 | ||
141 | // 3-byte encoded surrogate character(s) | |
142 | buf[0] = 0xed; buf[2] = 0x80; | |
143 | // Single surrogate | |
144 | CHECKINVALID(1, 0xa0, 3); | |
145 | // Trailing surrogate first | |
146 | CHECKINVALID(1, 0xb0, 3); | |
147 | ||
148 | // Four byte sequences | |
149 | buf[1] = 0x80; | |
150 | for (byt = 0xf0; byt < 0xf5; byt++) { | |
151 | // Lead followed by only 1 continuation bytes | |
152 | CHECKINVALID(0, byt, 2); | |
153 | // Lead followed by only 2 continuation bytes | |
154 | CHECKINVALID(0, byt, 3); | |
155 | // Lead followed by non-continuation character < 0x80 | |
156 | CHECKINVALID(3, 65, 4); | |
157 | // Lead followed by non-continuation character > 0xbf | |
158 | CHECKINVALID(3, 0xc0, 4); | |
159 | ||
160 | } | |
161 | ||
162 | check(!error, "utf8proc_iterate FAILED %d tests out of %d", error, tests); | |
163 | printf("utf8proc_iterate tests SUCCEEDED, (%d) tests passed.\n", tests); | |
164 | ||
165 | return 0; | |
166 | } |