]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*============================================================================= |
2 | Copyright (c) 2001-2011 Hartmut Kaiser | |
3 | Copyright (c) 2001-2011 Joel de Guzman | |
4 | ||
5 | Distributed under the Boost Software License, Version 1.0. (See accompanying | |
6 | file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
7 | =============================================================================*/ | |
8 | #if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM) | |
9 | #define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM | |
10 | ||
11 | #if defined(_MSC_VER) | |
12 | #pragma once | |
13 | #endif | |
14 | ||
15 | #include <climits> | |
16 | #include <boost/assert.hpp> | |
17 | #include <boost/cstdint.hpp> | |
18 | ||
19 | /////////////////////////////////////////////////////////////////////////////// | |
20 | // constants used to classify the single characters | |
21 | /////////////////////////////////////////////////////////////////////////////// | |
22 | #define BOOST_CC_DIGIT 0x0001 | |
23 | #define BOOST_CC_XDIGIT 0x0002 | |
24 | #define BOOST_CC_ALPHA 0x0004 | |
25 | #define BOOST_CC_CTRL 0x0008 | |
26 | #define BOOST_CC_LOWER 0x0010 | |
27 | #define BOOST_CC_UPPER 0x0020 | |
28 | #define BOOST_CC_SPACE 0x0040 | |
29 | #define BOOST_CC_PUNCT 0x0080 | |
30 | ||
31 | namespace boost { namespace spirit { namespace char_encoding | |
32 | { | |
33 | // The detection of isgraph(), isprint() and isblank() is done programmatically | |
34 | // to keep the character type table small. Additionally, these functions are | |
35 | // rather seldom used and the programmatic detection is very simple. | |
36 | ||
37 | /////////////////////////////////////////////////////////////////////////// | |
38 | // ASCII character classification table | |
39 | /////////////////////////////////////////////////////////////////////////// | |
40 | const unsigned char ascii_char_types[] = | |
41 | { | |
42 | /* NUL 0 0 */ BOOST_CC_CTRL, | |
43 | /* SOH 1 1 */ BOOST_CC_CTRL, | |
44 | /* STX 2 2 */ BOOST_CC_CTRL, | |
45 | /* ETX 3 3 */ BOOST_CC_CTRL, | |
46 | /* EOT 4 4 */ BOOST_CC_CTRL, | |
47 | /* ENQ 5 5 */ BOOST_CC_CTRL, | |
48 | /* ACK 6 6 */ BOOST_CC_CTRL, | |
49 | /* BEL 7 7 */ BOOST_CC_CTRL, | |
50 | /* BS 8 8 */ BOOST_CC_CTRL, | |
51 | /* HT 9 9 */ BOOST_CC_CTRL|BOOST_CC_SPACE, | |
52 | /* NL 10 a */ BOOST_CC_CTRL|BOOST_CC_SPACE, | |
53 | /* VT 11 b */ BOOST_CC_CTRL|BOOST_CC_SPACE, | |
54 | /* NP 12 c */ BOOST_CC_CTRL|BOOST_CC_SPACE, | |
55 | /* CR 13 d */ BOOST_CC_CTRL|BOOST_CC_SPACE, | |
56 | /* SO 14 e */ BOOST_CC_CTRL, | |
57 | /* SI 15 f */ BOOST_CC_CTRL, | |
58 | /* DLE 16 10 */ BOOST_CC_CTRL, | |
59 | /* DC1 17 11 */ BOOST_CC_CTRL, | |
60 | /* DC2 18 12 */ BOOST_CC_CTRL, | |
61 | /* DC3 19 13 */ BOOST_CC_CTRL, | |
62 | /* DC4 20 14 */ BOOST_CC_CTRL, | |
63 | /* NAK 21 15 */ BOOST_CC_CTRL, | |
64 | /* SYN 22 16 */ BOOST_CC_CTRL, | |
65 | /* ETB 23 17 */ BOOST_CC_CTRL, | |
66 | /* CAN 24 18 */ BOOST_CC_CTRL, | |
67 | /* EM 25 19 */ BOOST_CC_CTRL, | |
68 | /* SUB 26 1a */ BOOST_CC_CTRL, | |
69 | /* ESC 27 1b */ BOOST_CC_CTRL, | |
70 | /* FS 28 1c */ BOOST_CC_CTRL, | |
71 | /* GS 29 1d */ BOOST_CC_CTRL, | |
72 | /* RS 30 1e */ BOOST_CC_CTRL, | |
73 | /* US 31 1f */ BOOST_CC_CTRL, | |
74 | /* SP 32 20 */ BOOST_CC_SPACE, | |
75 | /* ! 33 21 */ BOOST_CC_PUNCT, | |
76 | /* " 34 22 */ BOOST_CC_PUNCT, | |
77 | /* # 35 23 */ BOOST_CC_PUNCT, | |
78 | /* $ 36 24 */ BOOST_CC_PUNCT, | |
79 | /* % 37 25 */ BOOST_CC_PUNCT, | |
80 | /* & 38 26 */ BOOST_CC_PUNCT, | |
81 | /* ' 39 27 */ BOOST_CC_PUNCT, | |
82 | /* ( 40 28 */ BOOST_CC_PUNCT, | |
83 | /* ) 41 29 */ BOOST_CC_PUNCT, | |
84 | /* * 42 2a */ BOOST_CC_PUNCT, | |
85 | /* + 43 2b */ BOOST_CC_PUNCT, | |
86 | /* , 44 2c */ BOOST_CC_PUNCT, | |
87 | /* - 45 2d */ BOOST_CC_PUNCT, | |
88 | /* . 46 2e */ BOOST_CC_PUNCT, | |
89 | /* / 47 2f */ BOOST_CC_PUNCT, | |
90 | /* 0 48 30 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
91 | /* 1 49 31 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
92 | /* 2 50 32 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
93 | /* 3 51 33 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
94 | /* 4 52 34 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
95 | /* 5 53 35 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
96 | /* 6 54 36 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
97 | /* 7 55 37 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
98 | /* 8 56 38 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
99 | /* 9 57 39 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
100 | /* : 58 3a */ BOOST_CC_PUNCT, | |
101 | /* ; 59 3b */ BOOST_CC_PUNCT, | |
102 | /* < 60 3c */ BOOST_CC_PUNCT, | |
103 | /* = 61 3d */ BOOST_CC_PUNCT, | |
104 | /* > 62 3e */ BOOST_CC_PUNCT, | |
105 | /* ? 63 3f */ BOOST_CC_PUNCT, | |
106 | /* @ 64 40 */ BOOST_CC_PUNCT, | |
107 | /* A 65 41 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, | |
108 | /* B 66 42 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, | |
109 | /* C 67 43 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, | |
110 | /* D 68 44 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, | |
111 | /* E 69 45 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, | |
112 | /* F 70 46 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, | |
113 | /* G 71 47 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
114 | /* H 72 48 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
115 | /* I 73 49 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
116 | /* J 74 4a */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
117 | /* K 75 4b */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
118 | /* L 76 4c */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
119 | /* M 77 4d */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
120 | /* N 78 4e */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
121 | /* O 79 4f */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
122 | /* P 80 50 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
123 | /* Q 81 51 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
124 | /* R 82 52 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
125 | /* S 83 53 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
126 | /* T 84 54 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
127 | /* U 85 55 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
128 | /* V 86 56 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
129 | /* W 87 57 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
130 | /* X 88 58 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
131 | /* Y 89 59 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
132 | /* Z 90 5a */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
133 | /* [ 91 5b */ BOOST_CC_PUNCT, | |
134 | /* \ 92 5c */ BOOST_CC_PUNCT, | |
135 | /* ] 93 5d */ BOOST_CC_PUNCT, | |
136 | /* ^ 94 5e */ BOOST_CC_PUNCT, | |
137 | /* _ 95 5f */ BOOST_CC_PUNCT, | |
138 | /* ` 96 60 */ BOOST_CC_PUNCT, | |
139 | /* a 97 61 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, | |
140 | /* b 98 62 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, | |
141 | /* c 99 63 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, | |
142 | /* d 100 64 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, | |
143 | /* e 101 65 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, | |
144 | /* f 102 66 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, | |
145 | /* g 103 67 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
146 | /* h 104 68 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
147 | /* i 105 69 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
148 | /* j 106 6a */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
149 | /* k 107 6b */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
150 | /* l 108 6c */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
151 | /* m 109 6d */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
152 | /* n 110 6e */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
153 | /* o 111 6f */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
154 | /* p 112 70 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
155 | /* q 113 71 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
156 | /* r 114 72 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
157 | /* s 115 73 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
158 | /* t 116 74 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
159 | /* u 117 75 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
160 | /* v 118 76 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
161 | /* w 119 77 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
162 | /* x 120 78 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
163 | /* y 121 79 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
164 | /* z 122 7a */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
165 | /* { 123 7b */ BOOST_CC_PUNCT, | |
166 | /* | 124 7c */ BOOST_CC_PUNCT, | |
167 | /* } 125 7d */ BOOST_CC_PUNCT, | |
168 | /* ~ 126 7e */ BOOST_CC_PUNCT, | |
169 | /* DEL 127 7f */ BOOST_CC_CTRL, | |
170 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
171 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
172 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
173 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
174 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
175 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
176 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
177 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
178 | }; | |
179 | ||
180 | /////////////////////////////////////////////////////////////////////////// | |
181 | // Test characters for specified conditions (using ASCII) | |
182 | /////////////////////////////////////////////////////////////////////////// | |
183 | struct ascii | |
184 | { | |
185 | typedef char char_type; | |
92f5a8d4 | 186 | typedef unsigned char classify_type; |
7c673cae FG |
187 | |
188 | static bool | |
189 | isascii_(int ch) | |
190 | { | |
191 | return 0 == (ch & ~0x7f); | |
192 | } | |
193 | ||
194 | static bool | |
195 | ischar(int ch) | |
196 | { | |
197 | return isascii_(ch); | |
198 | } | |
199 | ||
92f5a8d4 TL |
200 | // *** Note on assertions: The precondition is that the calls to |
201 | // these functions do not violate the required range of ch (type int) | |
202 | // which is that strict_ischar(ch) should be true. It is the | |
203 | // responsibility of the caller to make sure this precondition is not | |
204 | // violated. | |
205 | ||
206 | static bool | |
207 | strict_ischar(int ch) | |
208 | { | |
209 | return ch >= 0 && ch <= 127; | |
210 | } | |
211 | ||
7c673cae FG |
212 | static bool |
213 | isalnum(int ch) | |
214 | { | |
92f5a8d4 | 215 | BOOST_ASSERT(strict_ischar(ch)); |
7c673cae FG |
216 | return (ascii_char_types[ch] & BOOST_CC_ALPHA) |
217 | || (ascii_char_types[ch] & BOOST_CC_DIGIT); | |
218 | } | |
219 | ||
220 | static bool | |
221 | isalpha(int ch) | |
222 | { | |
92f5a8d4 | 223 | BOOST_ASSERT(strict_ischar(ch)); |
7c673cae FG |
224 | return (ascii_char_types[ch] & BOOST_CC_ALPHA) ? true : false; |
225 | } | |
226 | ||
227 | static bool | |
228 | isdigit(int ch) | |
229 | { | |
92f5a8d4 | 230 | BOOST_ASSERT(strict_ischar(ch)); |
7c673cae FG |
231 | return (ascii_char_types[ch] & BOOST_CC_DIGIT) ? true : false; |
232 | } | |
233 | ||
234 | static bool | |
235 | isxdigit(int ch) | |
236 | { | |
92f5a8d4 | 237 | BOOST_ASSERT(strict_ischar(ch)); |
7c673cae FG |
238 | return (ascii_char_types[ch] & BOOST_CC_XDIGIT) ? true : false; |
239 | } | |
240 | ||
241 | static bool | |
242 | iscntrl(int ch) | |
243 | { | |
92f5a8d4 | 244 | BOOST_ASSERT(strict_ischar(ch)); |
7c673cae FG |
245 | return (ascii_char_types[ch] & BOOST_CC_CTRL) ? true : false; |
246 | } | |
247 | ||
248 | static bool | |
249 | isgraph(int ch) | |
250 | { | |
92f5a8d4 | 251 | BOOST_ASSERT(strict_ischar(ch)); |
7c673cae FG |
252 | return ('\x21' <= ch && ch <= '\x7e'); |
253 | } | |
254 | ||
255 | static bool | |
256 | islower(int ch) | |
257 | { | |
92f5a8d4 | 258 | BOOST_ASSERT(strict_ischar(ch)); |
7c673cae FG |
259 | return (ascii_char_types[ch] & BOOST_CC_LOWER) ? true : false; |
260 | } | |
261 | ||
262 | static bool | |
263 | isprint(int ch) | |
264 | { | |
92f5a8d4 | 265 | BOOST_ASSERT(strict_ischar(ch)); |
7c673cae FG |
266 | return ('\x20' <= ch && ch <= '\x7e'); |
267 | } | |
268 | ||
269 | static bool | |
270 | ispunct(int ch) | |
271 | { | |
92f5a8d4 | 272 | BOOST_ASSERT(strict_ischar(ch)); |
7c673cae FG |
273 | return (ascii_char_types[ch] & BOOST_CC_PUNCT) ? true : false; |
274 | } | |
275 | ||
276 | static bool | |
277 | isspace(int ch) | |
278 | { | |
92f5a8d4 | 279 | BOOST_ASSERT(strict_ischar(ch)); |
7c673cae FG |
280 | return (ascii_char_types[ch] & BOOST_CC_SPACE) ? true : false; |
281 | } | |
282 | ||
283 | static bool | |
284 | isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch) | |
285 | { | |
92f5a8d4 | 286 | BOOST_ASSERT(strict_ischar(ch)); |
7c673cae FG |
287 | return ('\x09' == ch || '\x20' == ch); |
288 | } | |
289 | ||
290 | static bool | |
291 | isupper(int ch) | |
292 | { | |
92f5a8d4 | 293 | BOOST_ASSERT(strict_ischar(ch)); |
7c673cae FG |
294 | return (ascii_char_types[ch] & BOOST_CC_UPPER) ? true : false; |
295 | } | |
296 | ||
297 | /////////////////////////////////////////////////////////////////////// | |
298 | // Simple character conversions | |
299 | /////////////////////////////////////////////////////////////////////// | |
300 | ||
301 | static int | |
302 | tolower(int ch) | |
303 | { | |
92f5a8d4 | 304 | BOOST_ASSERT(strict_ischar(ch)); |
7c673cae FG |
305 | return isupper(ch) ? (ch - 'A' + 'a') : ch; |
306 | } | |
307 | ||
308 | static int | |
309 | toupper(int ch) | |
310 | { | |
92f5a8d4 | 311 | BOOST_ASSERT(strict_ischar(ch)); |
7c673cae FG |
312 | return islower(ch) ? (ch - 'a' + 'A') : ch; |
313 | } | |
314 | ||
315 | static ::boost::uint32_t | |
316 | toucs4(int ch) | |
317 | { | |
92f5a8d4 | 318 | BOOST_ASSERT(strict_ischar(ch)); |
7c673cae FG |
319 | return ch; |
320 | } | |
321 | }; | |
322 | ||
323 | }}} | |
324 | ||
325 | /////////////////////////////////////////////////////////////////////////////// | |
326 | // undefine macros | |
327 | /////////////////////////////////////////////////////////////////////////////// | |
328 | #undef BOOST_CC_DIGIT | |
329 | #undef BOOST_CC_XDIGIT | |
330 | #undef BOOST_CC_ALPHA | |
331 | #undef BOOST_CC_CTRL | |
332 | #undef BOOST_CC_LOWER | |
333 | #undef BOOST_CC_UPPER | |
334 | #undef BOOST_CC_PUNCT | |
335 | #undef BOOST_CC_SPACE | |
336 | ||
337 | #endif |