]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*============================================================================= |
2 | Copyright (c) 2001-2011 Hartmut Kaiser | |
3 | Copyright (c) 2001-2011 Joel de Guzman | |
4 | ||
5 | Distributed under the Boost Software License, Version 1.0. (See accompanying | |
6 | file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
7 | =============================================================================*/ | |
8 | #if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM) | |
9 | #define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM | |
10 | ||
11 | #if defined(_MSC_VER) | |
12 | #pragma once | |
13 | #endif | |
14 | ||
15 | #include <climits> | |
16 | #include <boost/assert.hpp> | |
17 | #include <boost/cstdint.hpp> | |
18 | ||
19 | /////////////////////////////////////////////////////////////////////////////// | |
20 | // constants used to classify the single characters | |
21 | /////////////////////////////////////////////////////////////////////////////// | |
22 | #define BOOST_CC_DIGIT 0x0001 | |
23 | #define BOOST_CC_XDIGIT 0x0002 | |
24 | #define BOOST_CC_ALPHA 0x0004 | |
25 | #define BOOST_CC_CTRL 0x0008 | |
26 | #define BOOST_CC_LOWER 0x0010 | |
27 | #define BOOST_CC_UPPER 0x0020 | |
28 | #define BOOST_CC_SPACE 0x0040 | |
29 | #define BOOST_CC_PUNCT 0x0080 | |
30 | ||
31 | namespace boost { namespace spirit { namespace char_encoding | |
32 | { | |
33 | // The detection of isgraph(), isprint() and isblank() is done programmatically | |
34 | // to keep the character type table small. Additionally, these functions are | |
35 | // rather seldom used and the programmatic detection is very simple. | |
36 | ||
37 | /////////////////////////////////////////////////////////////////////////// | |
38 | // ASCII character classification table | |
39 | /////////////////////////////////////////////////////////////////////////// | |
40 | const unsigned char ascii_char_types[] = | |
41 | { | |
42 | /* NUL 0 0 */ BOOST_CC_CTRL, | |
43 | /* SOH 1 1 */ BOOST_CC_CTRL, | |
44 | /* STX 2 2 */ BOOST_CC_CTRL, | |
45 | /* ETX 3 3 */ BOOST_CC_CTRL, | |
46 | /* EOT 4 4 */ BOOST_CC_CTRL, | |
47 | /* ENQ 5 5 */ BOOST_CC_CTRL, | |
48 | /* ACK 6 6 */ BOOST_CC_CTRL, | |
49 | /* BEL 7 7 */ BOOST_CC_CTRL, | |
50 | /* BS 8 8 */ BOOST_CC_CTRL, | |
51 | /* HT 9 9 */ BOOST_CC_CTRL|BOOST_CC_SPACE, | |
52 | /* NL 10 a */ BOOST_CC_CTRL|BOOST_CC_SPACE, | |
53 | /* VT 11 b */ BOOST_CC_CTRL|BOOST_CC_SPACE, | |
54 | /* NP 12 c */ BOOST_CC_CTRL|BOOST_CC_SPACE, | |
55 | /* CR 13 d */ BOOST_CC_CTRL|BOOST_CC_SPACE, | |
56 | /* SO 14 e */ BOOST_CC_CTRL, | |
57 | /* SI 15 f */ BOOST_CC_CTRL, | |
58 | /* DLE 16 10 */ BOOST_CC_CTRL, | |
59 | /* DC1 17 11 */ BOOST_CC_CTRL, | |
60 | /* DC2 18 12 */ BOOST_CC_CTRL, | |
61 | /* DC3 19 13 */ BOOST_CC_CTRL, | |
62 | /* DC4 20 14 */ BOOST_CC_CTRL, | |
63 | /* NAK 21 15 */ BOOST_CC_CTRL, | |
64 | /* SYN 22 16 */ BOOST_CC_CTRL, | |
65 | /* ETB 23 17 */ BOOST_CC_CTRL, | |
66 | /* CAN 24 18 */ BOOST_CC_CTRL, | |
67 | /* EM 25 19 */ BOOST_CC_CTRL, | |
68 | /* SUB 26 1a */ BOOST_CC_CTRL, | |
69 | /* ESC 27 1b */ BOOST_CC_CTRL, | |
70 | /* FS 28 1c */ BOOST_CC_CTRL, | |
71 | /* GS 29 1d */ BOOST_CC_CTRL, | |
72 | /* RS 30 1e */ BOOST_CC_CTRL, | |
73 | /* US 31 1f */ BOOST_CC_CTRL, | |
74 | /* SP 32 20 */ BOOST_CC_SPACE, | |
75 | /* ! 33 21 */ BOOST_CC_PUNCT, | |
76 | /* " 34 22 */ BOOST_CC_PUNCT, | |
77 | /* # 35 23 */ BOOST_CC_PUNCT, | |
78 | /* $ 36 24 */ BOOST_CC_PUNCT, | |
79 | /* % 37 25 */ BOOST_CC_PUNCT, | |
80 | /* & 38 26 */ BOOST_CC_PUNCT, | |
81 | /* ' 39 27 */ BOOST_CC_PUNCT, | |
82 | /* ( 40 28 */ BOOST_CC_PUNCT, | |
83 | /* ) 41 29 */ BOOST_CC_PUNCT, | |
84 | /* * 42 2a */ BOOST_CC_PUNCT, | |
85 | /* + 43 2b */ BOOST_CC_PUNCT, | |
86 | /* , 44 2c */ BOOST_CC_PUNCT, | |
87 | /* - 45 2d */ BOOST_CC_PUNCT, | |
88 | /* . 46 2e */ BOOST_CC_PUNCT, | |
89 | /* / 47 2f */ BOOST_CC_PUNCT, | |
90 | /* 0 48 30 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
91 | /* 1 49 31 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
92 | /* 2 50 32 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
93 | /* 3 51 33 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
94 | /* 4 52 34 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
95 | /* 5 53 35 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
96 | /* 6 54 36 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
97 | /* 7 55 37 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
98 | /* 8 56 38 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
99 | /* 9 57 39 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, | |
100 | /* : 58 3a */ BOOST_CC_PUNCT, | |
101 | /* ; 59 3b */ BOOST_CC_PUNCT, | |
102 | /* < 60 3c */ BOOST_CC_PUNCT, | |
103 | /* = 61 3d */ BOOST_CC_PUNCT, | |
104 | /* > 62 3e */ BOOST_CC_PUNCT, | |
105 | /* ? 63 3f */ BOOST_CC_PUNCT, | |
106 | /* @ 64 40 */ BOOST_CC_PUNCT, | |
107 | /* A 65 41 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, | |
108 | /* B 66 42 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, | |
109 | /* C 67 43 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, | |
110 | /* D 68 44 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, | |
111 | /* E 69 45 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, | |
112 | /* F 70 46 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, | |
113 | /* G 71 47 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
114 | /* H 72 48 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
115 | /* I 73 49 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
116 | /* J 74 4a */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
117 | /* K 75 4b */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
118 | /* L 76 4c */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
119 | /* M 77 4d */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
120 | /* N 78 4e */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
121 | /* O 79 4f */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
122 | /* P 80 50 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
123 | /* Q 81 51 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
124 | /* R 82 52 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
125 | /* S 83 53 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
126 | /* T 84 54 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
127 | /* U 85 55 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
128 | /* V 86 56 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
129 | /* W 87 57 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
130 | /* X 88 58 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
131 | /* Y 89 59 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
132 | /* Z 90 5a */ BOOST_CC_ALPHA|BOOST_CC_UPPER, | |
133 | /* [ 91 5b */ BOOST_CC_PUNCT, | |
134 | /* \ 92 5c */ BOOST_CC_PUNCT, | |
135 | /* ] 93 5d */ BOOST_CC_PUNCT, | |
136 | /* ^ 94 5e */ BOOST_CC_PUNCT, | |
137 | /* _ 95 5f */ BOOST_CC_PUNCT, | |
138 | /* ` 96 60 */ BOOST_CC_PUNCT, | |
139 | /* a 97 61 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, | |
140 | /* b 98 62 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, | |
141 | /* c 99 63 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, | |
142 | /* d 100 64 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, | |
143 | /* e 101 65 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, | |
144 | /* f 102 66 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, | |
145 | /* g 103 67 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
146 | /* h 104 68 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
147 | /* i 105 69 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
148 | /* j 106 6a */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
149 | /* k 107 6b */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
150 | /* l 108 6c */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
151 | /* m 109 6d */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
152 | /* n 110 6e */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
153 | /* o 111 6f */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
154 | /* p 112 70 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
155 | /* q 113 71 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
156 | /* r 114 72 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
157 | /* s 115 73 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
158 | /* t 116 74 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
159 | /* u 117 75 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
160 | /* v 118 76 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
161 | /* w 119 77 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
162 | /* x 120 78 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
163 | /* y 121 79 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
164 | /* z 122 7a */ BOOST_CC_ALPHA|BOOST_CC_LOWER, | |
165 | /* { 123 7b */ BOOST_CC_PUNCT, | |
166 | /* | 124 7c */ BOOST_CC_PUNCT, | |
167 | /* } 125 7d */ BOOST_CC_PUNCT, | |
168 | /* ~ 126 7e */ BOOST_CC_PUNCT, | |
169 | /* DEL 127 7f */ BOOST_CC_CTRL, | |
170 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
171 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
172 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
173 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
174 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
175 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
176 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
177 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
178 | }; | |
179 | ||
180 | /////////////////////////////////////////////////////////////////////////// | |
181 | // Test characters for specified conditions (using ASCII) | |
182 | /////////////////////////////////////////////////////////////////////////// | |
183 | struct ascii | |
184 | { | |
185 | typedef char char_type; | |
186 | ||
187 | static bool | |
188 | isascii_(int ch) | |
189 | { | |
190 | return 0 == (ch & ~0x7f); | |
191 | } | |
192 | ||
193 | static bool | |
194 | ischar(int ch) | |
195 | { | |
196 | return isascii_(ch); | |
197 | } | |
198 | ||
199 | static bool | |
200 | isalnum(int ch) | |
201 | { | |
202 | BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); | |
203 | return (ascii_char_types[ch] & BOOST_CC_ALPHA) | |
204 | || (ascii_char_types[ch] & BOOST_CC_DIGIT); | |
205 | } | |
206 | ||
207 | static bool | |
208 | isalpha(int ch) | |
209 | { | |
210 | BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); | |
211 | return (ascii_char_types[ch] & BOOST_CC_ALPHA) ? true : false; | |
212 | } | |
213 | ||
214 | static bool | |
215 | isdigit(int ch) | |
216 | { | |
217 | BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); | |
218 | return (ascii_char_types[ch] & BOOST_CC_DIGIT) ? true : false; | |
219 | } | |
220 | ||
221 | static bool | |
222 | isxdigit(int ch) | |
223 | { | |
224 | BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); | |
225 | return (ascii_char_types[ch] & BOOST_CC_XDIGIT) ? true : false; | |
226 | } | |
227 | ||
228 | static bool | |
229 | iscntrl(int ch) | |
230 | { | |
231 | BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); | |
232 | return (ascii_char_types[ch] & BOOST_CC_CTRL) ? true : false; | |
233 | } | |
234 | ||
235 | static bool | |
236 | isgraph(int ch) | |
237 | { | |
238 | return ('\x21' <= ch && ch <= '\x7e'); | |
239 | } | |
240 | ||
241 | static bool | |
242 | islower(int ch) | |
243 | { | |
244 | BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); | |
245 | return (ascii_char_types[ch] & BOOST_CC_LOWER) ? true : false; | |
246 | } | |
247 | ||
248 | static bool | |
249 | isprint(int ch) | |
250 | { | |
251 | return ('\x20' <= ch && ch <= '\x7e'); | |
252 | } | |
253 | ||
254 | static bool | |
255 | ispunct(int ch) | |
256 | { | |
257 | BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); | |
258 | return (ascii_char_types[ch] & BOOST_CC_PUNCT) ? true : false; | |
259 | } | |
260 | ||
261 | static bool | |
262 | isspace(int ch) | |
263 | { | |
264 | BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); | |
265 | return (ascii_char_types[ch] & BOOST_CC_SPACE) ? true : false; | |
266 | } | |
267 | ||
268 | static bool | |
269 | isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch) | |
270 | { | |
271 | return ('\x09' == ch || '\x20' == ch); | |
272 | } | |
273 | ||
274 | static bool | |
275 | isupper(int ch) | |
276 | { | |
277 | BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); | |
278 | return (ascii_char_types[ch] & BOOST_CC_UPPER) ? true : false; | |
279 | } | |
280 | ||
281 | /////////////////////////////////////////////////////////////////////// | |
282 | // Simple character conversions | |
283 | /////////////////////////////////////////////////////////////////////// | |
284 | ||
285 | static int | |
286 | tolower(int ch) | |
287 | { | |
288 | return isupper(ch) ? (ch - 'A' + 'a') : ch; | |
289 | } | |
290 | ||
291 | static int | |
292 | toupper(int ch) | |
293 | { | |
294 | return islower(ch) ? (ch - 'a' + 'A') : ch; | |
295 | } | |
296 | ||
297 | static ::boost::uint32_t | |
298 | toucs4(int ch) | |
299 | { | |
300 | return ch; | |
301 | } | |
302 | }; | |
303 | ||
304 | }}} | |
305 | ||
306 | /////////////////////////////////////////////////////////////////////////////// | |
307 | // undefine macros | |
308 | /////////////////////////////////////////////////////////////////////////////// | |
309 | #undef BOOST_CC_DIGIT | |
310 | #undef BOOST_CC_XDIGIT | |
311 | #undef BOOST_CC_ALPHA | |
312 | #undef BOOST_CC_CTRL | |
313 | #undef BOOST_CC_LOWER | |
314 | #undef BOOST_CC_UPPER | |
315 | #undef BOOST_CC_PUNCT | |
316 | #undef BOOST_CC_SPACE | |
317 | ||
318 | #endif |