]>
Commit | Line | Data |
---|---|---|
ea8adc8c XL |
1 | // Copyright 2015 Google Inc. All rights reserved. |
2 | // | |
3 | // Permission is hereby granted, free of charge, to any person obtaining a copy | |
4 | // of this software and associated documentation files (the "Software"), to deal | |
5 | // in the Software without restriction, including without limitation the rights | |
6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
7 | // copies of the Software, and to permit persons to whom the Software is | |
8 | // furnished to do so, subject to the following conditions: | |
9 | // | |
10 | // The above copyright notice and this permission notice shall be included in | |
11 | // all copies or substantial portions of the Software. | |
12 | // | |
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
19 | // THE SOFTWARE. | |
20 | ||
21 | //! CommonMark punctuation set based on spec and Unicode properties. | |
22 | ||
23 | // Autogenerated by mk_puncttable.py | |
24 | ||
25 | const PUNCT_MASKS_ASCII: [u16; 8] = [ | |
26 | 0x0000, // U+0000...U+000F | |
27 | 0x0000, // U+0010...U+001F | |
28 | 0xfffe, // U+0020...U+002F | |
29 | 0xfc00, // U+0030...U+003F | |
30 | 0x0001, // U+0040...U+004F | |
31 | 0xf800, // U+0050...U+005F | |
32 | 0x0001, // U+0060...U+006F | |
33 | 0x7800, // U+0070...U+007F | |
34 | ]; | |
35 | ||
36 | const PUNCT_TAB: [u16; 132] = [ | |
37 | 10, // U+00A0...U+00AF | |
38 | 11, // U+00B0...U+00BF | |
39 | 55, // U+0370...U+037F | |
40 | 56, // U+0380...U+038F | |
41 | 85, // U+0550...U+055F | |
42 | 88, // U+0580...U+058F | |
43 | 91, // U+05B0...U+05BF | |
44 | 92, // U+05C0...U+05CF | |
45 | 95, // U+05F0...U+05FF | |
46 | 96, // U+0600...U+060F | |
47 | 97, // U+0610...U+061F | |
48 | 102, // U+0660...U+066F | |
49 | 109, // U+06D0...U+06DF | |
50 | 112, // U+0700...U+070F | |
51 | 127, // U+07F0...U+07FF | |
52 | 131, // U+0830...U+083F | |
53 | 133, // U+0850...U+085F | |
54 | 150, // U+0960...U+096F | |
55 | 151, // U+0970...U+097F | |
56 | 175, // U+0AF0...U+0AFF | |
57 | 223, // U+0DF0...U+0DFF | |
58 | 228, // U+0E40...U+0E4F | |
59 | 229, // U+0E50...U+0E5F | |
60 | 240, // U+0F00...U+0F0F | |
61 | 241, // U+0F10...U+0F1F | |
62 | 243, // U+0F30...U+0F3F | |
63 | 248, // U+0F80...U+0F8F | |
64 | 253, // U+0FD0...U+0FDF | |
65 | 260, // U+1040...U+104F | |
66 | 271, // U+10F0...U+10FF | |
67 | 310, // U+1360...U+136F | |
68 | 320, // U+1400...U+140F | |
69 | 358, // U+1660...U+166F | |
70 | 361, // U+1690...U+169F | |
71 | 366, // U+16E0...U+16EF | |
72 | 371, // U+1730...U+173F | |
73 | 381, // U+17D0...U+17DF | |
74 | 384, // U+1800...U+180F | |
75 | 404, // U+1940...U+194F | |
76 | 417, // U+1A10...U+1A1F | |
77 | 426, // U+1AA0...U+1AAF | |
78 | 437, // U+1B50...U+1B5F | |
79 | 438, // U+1B60...U+1B6F | |
80 | 447, // U+1BF0...U+1BFF | |
81 | 451, // U+1C30...U+1C3F | |
82 | 455, // U+1C70...U+1C7F | |
83 | 460, // U+1CC0...U+1CCF | |
84 | 461, // U+1CD0...U+1CDF | |
85 | 513, // U+2010...U+201F | |
86 | 514, // U+2020...U+202F | |
87 | 515, // U+2030...U+203F | |
88 | 516, // U+2040...U+204F | |
89 | 517, // U+2050...U+205F | |
90 | 519, // U+2070...U+207F | |
91 | 520, // U+2080...U+208F | |
92 | 560, // U+2300...U+230F | |
93 | 562, // U+2320...U+232F | |
94 | 630, // U+2760...U+276F | |
95 | 631, // U+2770...U+277F | |
96 | 636, // U+27C0...U+27CF | |
97 | 638, // U+27E0...U+27EF | |
98 | 664, // U+2980...U+298F | |
99 | 665, // U+2990...U+299F | |
100 | 669, // U+29D0...U+29DF | |
101 | 671, // U+29F0...U+29FF | |
102 | 719, // U+2CF0...U+2CFF | |
103 | 727, // U+2D70...U+2D7F | |
104 | 736, // U+2E00...U+2E0F | |
105 | 737, // U+2E10...U+2E1F | |
106 | 738, // U+2E20...U+2E2F | |
107 | 739, // U+2E30...U+2E3F | |
108 | 740, // U+2E40...U+2E4F | |
109 | 768, // U+3000...U+300F | |
110 | 769, // U+3010...U+301F | |
111 | 771, // U+3030...U+303F | |
112 | 778, // U+30A0...U+30AF | |
113 | 783, // U+30F0...U+30FF | |
114 | 2639, // U+A4F0...U+A4FF | |
115 | 2656, // U+A600...U+A60F | |
116 | 2663, // U+A670...U+A67F | |
117 | 2671, // U+A6F0...U+A6FF | |
118 | 2695, // U+A870...U+A87F | |
119 | 2700, // U+A8C0...U+A8CF | |
120 | 2703, // U+A8F0...U+A8FF | |
121 | 2706, // U+A920...U+A92F | |
122 | 2709, // U+A950...U+A95F | |
123 | 2716, // U+A9C0...U+A9CF | |
124 | 2717, // U+A9D0...U+A9DF | |
125 | 2725, // U+AA50...U+AA5F | |
126 | 2733, // U+AAD0...U+AADF | |
127 | 2735, // U+AAF0...U+AAFF | |
128 | 2750, // U+ABE0...U+ABEF | |
129 | 4051, // U+FD30...U+FD3F | |
130 | 4065, // U+FE10...U+FE1F | |
131 | 4067, // U+FE30...U+FE3F | |
132 | 4068, // U+FE40...U+FE4F | |
133 | 4069, // U+FE50...U+FE5F | |
134 | 4070, // U+FE60...U+FE6F | |
135 | 4080, // U+FF00...U+FF0F | |
136 | 4081, // U+FF10...U+FF1F | |
137 | 4082, // U+FF20...U+FF2F | |
138 | 4083, // U+FF30...U+FF3F | |
139 | 4085, // U+FF50...U+FF5F | |
140 | 4086, // U+FF60...U+FF6F | |
141 | 4112, // U+10100...U+1010F | |
142 | 4153, // U+10390...U+1039F | |
143 | 4157, // U+103D0...U+103DF | |
144 | 4182, // U+10560...U+1056F | |
145 | 4229, // U+10850...U+1085F | |
146 | 4241, // U+10910...U+1091F | |
147 | 4243, // U+10930...U+1093F | |
148 | 4261, // U+10A50...U+10A5F | |
149 | 4263, // U+10A70...U+10A7F | |
150 | 4271, // U+10AF0...U+10AFF | |
151 | 4275, // U+10B30...U+10B3F | |
152 | 4281, // U+10B90...U+10B9F | |
153 | 4356, // U+11040...U+1104F | |
154 | 4363, // U+110B0...U+110BF | |
155 | 4364, // U+110C0...U+110CF | |
156 | 4372, // U+11140...U+1114F | |
157 | 4375, // U+11170...U+1117F | |
158 | 4380, // U+111C0...U+111CF | |
159 | 4387, // U+11230...U+1123F | |
160 | 4428, // U+114C0...U+114CF | |
161 | 4444, // U+115C0...U+115CF | |
162 | 4452, // U+11640...U+1164F | |
163 | 4679, // U+12470...U+1247F | |
164 | 5798, // U+16A60...U+16A6F | |
165 | 5807, // U+16AF0...U+16AFF | |
166 | 5811, // U+16B30...U+16B3F | |
167 | 5812, // U+16B40...U+16B4F | |
168 | 7113, // U+1BC90...U+1BC9F | |
169 | ]; | |
170 | ||
171 | const PUNCT_MASKS: [u16; 132] = [ | |
172 | 0x0882, // U+00A0...U+00AF | |
173 | 0x88c0, // U+00B0...U+00BF | |
174 | 0x4000, // U+0370...U+037F | |
175 | 0x0080, // U+0380...U+038F | |
176 | 0xfc00, // U+0550...U+055F | |
177 | 0x0600, // U+0580...U+058F | |
178 | 0x4000, // U+05B0...U+05BF | |
179 | 0x0049, // U+05C0...U+05CF | |
180 | 0x0018, // U+05F0...U+05FF | |
181 | 0x3600, // U+0600...U+060F | |
182 | 0xc800, // U+0610...U+061F | |
183 | 0x3c00, // U+0660...U+066F | |
184 | 0x0010, // U+06D0...U+06DF | |
185 | 0x3fff, // U+0700...U+070F | |
186 | 0x0380, // U+07F0...U+07FF | |
187 | 0x7fff, // U+0830...U+083F | |
188 | 0x4000, // U+0850...U+085F | |
189 | 0x0030, // U+0960...U+096F | |
190 | 0x0001, // U+0970...U+097F | |
191 | 0x0001, // U+0AF0...U+0AFF | |
192 | 0x0010, // U+0DF0...U+0DFF | |
193 | 0x8000, // U+0E40...U+0E4F | |
194 | 0x0c00, // U+0E50...U+0E5F | |
195 | 0xfff0, // U+0F00...U+0F0F | |
196 | 0x0017, // U+0F10...U+0F1F | |
197 | 0x3c00, // U+0F30...U+0F3F | |
198 | 0x0020, // U+0F80...U+0F8F | |
199 | 0x061f, // U+0FD0...U+0FDF | |
200 | 0xfc00, // U+1040...U+104F | |
201 | 0x0800, // U+10F0...U+10FF | |
202 | 0x01ff, // U+1360...U+136F | |
203 | 0x0001, // U+1400...U+140F | |
204 | 0x6000, // U+1660...U+166F | |
205 | 0x1800, // U+1690...U+169F | |
206 | 0x3800, // U+16E0...U+16EF | |
207 | 0x0060, // U+1730...U+173F | |
208 | 0x0770, // U+17D0...U+17DF | |
209 | 0x07ff, // U+1800...U+180F | |
210 | 0x0030, // U+1940...U+194F | |
211 | 0xc000, // U+1A10...U+1A1F | |
212 | 0x3f7f, // U+1AA0...U+1AAF | |
213 | 0xfc00, // U+1B50...U+1B5F | |
214 | 0x0001, // U+1B60...U+1B6F | |
215 | 0xf000, // U+1BF0...U+1BFF | |
216 | 0xf800, // U+1C30...U+1C3F | |
217 | 0xc000, // U+1C70...U+1C7F | |
218 | 0x00ff, // U+1CC0...U+1CCF | |
219 | 0x0008, // U+1CD0...U+1CDF | |
220 | 0xffff, // U+2010...U+201F | |
221 | 0x00ff, // U+2020...U+202F | |
222 | 0xffff, // U+2030...U+203F | |
223 | 0xffef, // U+2040...U+204F | |
224 | 0x7ffb, // U+2050...U+205F | |
225 | 0x6000, // U+2070...U+207F | |
226 | 0x6000, // U+2080...U+208F | |
227 | 0x0f00, // U+2300...U+230F | |
228 | 0x0600, // U+2320...U+232F | |
229 | 0xff00, // U+2760...U+276F | |
230 | 0x003f, // U+2770...U+277F | |
231 | 0x0060, // U+27C0...U+27CF | |
232 | 0xffc0, // U+27E0...U+27EF | |
233 | 0xfff8, // U+2980...U+298F | |
234 | 0x01ff, // U+2990...U+299F | |
235 | 0x0f00, // U+29D0...U+29DF | |
236 | 0x3000, // U+29F0...U+29FF | |
237 | 0xde00, // U+2CF0...U+2CFF | |
238 | 0x0001, // U+2D70...U+2D7F | |
239 | 0xffff, // U+2E00...U+2E0F | |
240 | 0xffff, // U+2E10...U+2E1F | |
241 | 0x7fff, // U+2E20...U+2E2F | |
242 | 0xffff, // U+2E30...U+2E3F | |
243 | 0x0007, // U+2E40...U+2E4F | |
244 | 0xff0e, // U+3000...U+300F | |
245 | 0xfff3, // U+3010...U+301F | |
246 | 0x2001, // U+3030...U+303F | |
247 | 0x0001, // U+30A0...U+30AF | |
248 | 0x0800, // U+30F0...U+30FF | |
249 | 0xc000, // U+A4F0...U+A4FF | |
250 | 0xe000, // U+A600...U+A60F | |
251 | 0x4008, // U+A670...U+A67F | |
252 | 0x00fc, // U+A6F0...U+A6FF | |
253 | 0x00f0, // U+A870...U+A87F | |
254 | 0xc000, // U+A8C0...U+A8CF | |
255 | 0x0700, // U+A8F0...U+A8FF | |
256 | 0xc000, // U+A920...U+A92F | |
257 | 0x8000, // U+A950...U+A95F | |
258 | 0x3ffe, // U+A9C0...U+A9CF | |
259 | 0xc000, // U+A9D0...U+A9DF | |
260 | 0xf000, // U+AA50...U+AA5F | |
261 | 0xc000, // U+AAD0...U+AADF | |
262 | 0x0003, // U+AAF0...U+AAFF | |
263 | 0x0800, // U+ABE0...U+ABEF | |
264 | 0xc000, // U+FD30...U+FD3F | |
265 | 0x03ff, // U+FE10...U+FE1F | |
266 | 0xffff, // U+FE30...U+FE3F | |
267 | 0xffff, // U+FE40...U+FE4F | |
268 | 0xfff7, // U+FE50...U+FE5F | |
269 | 0x0d0b, // U+FE60...U+FE6F | |
270 | 0xf7ee, // U+FF00...U+FF0F | |
271 | 0x8c00, // U+FF10...U+FF1F | |
272 | 0x0001, // U+FF20...U+FF2F | |
273 | 0xb800, // U+FF30...U+FF3F | |
274 | 0xa800, // U+FF50...U+FF5F | |
275 | 0x003f, // U+FF60...U+FF6F | |
276 | 0x0007, // U+10100...U+1010F | |
277 | 0x8000, // U+10390...U+1039F | |
278 | 0x0001, // U+103D0...U+103DF | |
279 | 0x8000, // U+10560...U+1056F | |
280 | 0x0080, // U+10850...U+1085F | |
281 | 0x8000, // U+10910...U+1091F | |
282 | 0x8000, // U+10930...U+1093F | |
283 | 0x01ff, // U+10A50...U+10A5F | |
284 | 0x8000, // U+10A70...U+10A7F | |
285 | 0x007f, // U+10AF0...U+10AFF | |
286 | 0xfe00, // U+10B30...U+10B3F | |
287 | 0x1e00, // U+10B90...U+10B9F | |
288 | 0x3f80, // U+11040...U+1104F | |
289 | 0xd800, // U+110B0...U+110BF | |
290 | 0x0003, // U+110C0...U+110CF | |
291 | 0x000f, // U+11140...U+1114F | |
292 | 0x0030, // U+11170...U+1117F | |
293 | 0x21e0, // U+111C0...U+111CF | |
294 | 0x3f00, // U+11230...U+1123F | |
295 | 0x0040, // U+114C0...U+114CF | |
296 | 0x03fe, // U+115C0...U+115CF | |
297 | 0x000e, // U+11640...U+1164F | |
298 | 0x001f, // U+12470...U+1247F | |
299 | 0xc000, // U+16A60...U+16A6F | |
300 | 0x0020, // U+16AF0...U+16AFF | |
301 | 0x0f80, // U+16B30...U+16B3F | |
302 | 0x0010, // U+16B40...U+16B4F | |
303 | 0x8000, // U+1BC90...U+1BC9F | |
304 | ]; | |
305 | ||
306 | pub fn is_ascii_punctuation(c: u8) -> bool { | |
307 | c < 128 && (PUNCT_MASKS_ASCII[(c / 16) as usize] & (1 << (c & 15))) != 0 | |
308 | } | |
309 | ||
310 | pub fn is_punctuation(c: char) -> bool { | |
311 | let cp = c as u32; | |
312 | if cp < 128 {return is_ascii_punctuation(cp as u8); } | |
313 | if cp > 0x1BC9F { return false; } | |
314 | let high = (cp / 16) as u16; | |
315 | match PUNCT_TAB.binary_search(&high) { | |
316 | Ok(index) => (PUNCT_MASKS[index] & (1 << (cp & 15))) != 0, | |
317 | _ => false | |
318 | } | |
319 | } | |
320 | ||
321 | #[cfg(test)] | |
322 | mod tests { | |
323 | use super::{is_ascii_punctuation, is_punctuation}; | |
324 | ||
325 | #[test] | |
326 | fn test_ascii() { | |
327 | assert!(is_ascii_punctuation(b'!')); | |
328 | assert!(is_ascii_punctuation(b'@')); | |
329 | assert!(is_ascii_punctuation(b'~')); | |
330 | assert!(!is_ascii_punctuation(b' ')); | |
331 | assert!(!is_ascii_punctuation(b'0')); | |
332 | assert!(!is_ascii_punctuation(b'A')); | |
333 | assert!(!is_ascii_punctuation(0xA1)); | |
334 | } | |
335 | ||
336 | #[test] | |
337 | fn test_unicode() { | |
338 | assert!(is_punctuation('~')); | |
339 | assert!(!is_punctuation(' ')); | |
340 | ||
341 | assert!(is_punctuation('\u{00A1}')); | |
342 | assert!(is_punctuation('\u{060C}')); | |
343 | assert!(is_punctuation('\u{FF65}')); | |
344 | assert!(is_punctuation('\u{1BC9F}')); | |
345 | assert!(!is_punctuation('\u{1BCA0}')); | |
346 | } | |
347 | } | |
348 |