1 /*=============================================================================
2 Copyright (c) 2001-2011 Hartmut Kaiser
3 Copyright (c) 2001-2011 Joel de Guzman
5 Distributed under the Boost Software License, Version 1.0. (See accompanying
6 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 =============================================================================*/
8 #if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM)
9 #define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM
16 #include <boost/assert.hpp>
17 #include <boost/cstdint.hpp>
19 ///////////////////////////////////////////////////////////////////////////////
20 // constants used to classify the single characters
21 ///////////////////////////////////////////////////////////////////////////////
22 #define BOOST_CC_DIGIT 0x0001
23 #define BOOST_CC_XDIGIT 0x0002
24 #define BOOST_CC_ALPHA 0x0004
25 #define BOOST_CC_CTRL 0x0008
26 #define BOOST_CC_LOWER 0x0010
27 #define BOOST_CC_UPPER 0x0020
28 #define BOOST_CC_SPACE 0x0040
29 #define BOOST_CC_PUNCT 0x0080
31 namespace boost { namespace spirit { namespace char_encoding
33 // The detection of isgraph(), isprint() and isblank() is done programmatically
34 // to keep the character type table small. Additionally, these functions are
35 // rather seldom used and the programmatic detection is very simple.
37 ///////////////////////////////////////////////////////////////////////////
38 // ASCII character classification table
39 ///////////////////////////////////////////////////////////////////////////
40 const unsigned char ascii_char_types[] =
42 /* NUL 0 0 */ BOOST_CC_CTRL,
43 /* SOH 1 1 */ BOOST_CC_CTRL,
44 /* STX 2 2 */ BOOST_CC_CTRL,
45 /* ETX 3 3 */ BOOST_CC_CTRL,
46 /* EOT 4 4 */ BOOST_CC_CTRL,
47 /* ENQ 5 5 */ BOOST_CC_CTRL,
48 /* ACK 6 6 */ BOOST_CC_CTRL,
49 /* BEL 7 7 */ BOOST_CC_CTRL,
50 /* BS 8 8 */ BOOST_CC_CTRL,
51 /* HT 9 9 */ BOOST_CC_CTRL|BOOST_CC_SPACE,
52 /* NL 10 a */ BOOST_CC_CTRL|BOOST_CC_SPACE,
53 /* VT 11 b */ BOOST_CC_CTRL|BOOST_CC_SPACE,
54 /* NP 12 c */ BOOST_CC_CTRL|BOOST_CC_SPACE,
55 /* CR 13 d */ BOOST_CC_CTRL|BOOST_CC_SPACE,
56 /* SO 14 e */ BOOST_CC_CTRL,
57 /* SI 15 f */ BOOST_CC_CTRL,
58 /* DLE 16 10 */ BOOST_CC_CTRL,
59 /* DC1 17 11 */ BOOST_CC_CTRL,
60 /* DC2 18 12 */ BOOST_CC_CTRL,
61 /* DC3 19 13 */ BOOST_CC_CTRL,
62 /* DC4 20 14 */ BOOST_CC_CTRL,
63 /* NAK 21 15 */ BOOST_CC_CTRL,
64 /* SYN 22 16 */ BOOST_CC_CTRL,
65 /* ETB 23 17 */ BOOST_CC_CTRL,
66 /* CAN 24 18 */ BOOST_CC_CTRL,
67 /* EM 25 19 */ BOOST_CC_CTRL,
68 /* SUB 26 1a */ BOOST_CC_CTRL,
69 /* ESC 27 1b */ BOOST_CC_CTRL,
70 /* FS 28 1c */ BOOST_CC_CTRL,
71 /* GS 29 1d */ BOOST_CC_CTRL,
72 /* RS 30 1e */ BOOST_CC_CTRL,
73 /* US 31 1f */ BOOST_CC_CTRL,
74 /* SP 32 20 */ BOOST_CC_SPACE,
75 /* ! 33 21 */ BOOST_CC_PUNCT,
76 /* " 34 22 */ BOOST_CC_PUNCT,
77 /* # 35 23 */ BOOST_CC_PUNCT,
78 /* $ 36 24 */ BOOST_CC_PUNCT,
79 /* % 37 25 */ BOOST_CC_PUNCT,
80 /* & 38 26 */ BOOST_CC_PUNCT,
81 /* ' 39 27 */ BOOST_CC_PUNCT,
82 /* ( 40 28 */ BOOST_CC_PUNCT,
83 /* ) 41 29 */ BOOST_CC_PUNCT,
84 /* * 42 2a */ BOOST_CC_PUNCT,
85 /* + 43 2b */ BOOST_CC_PUNCT,
86 /* , 44 2c */ BOOST_CC_PUNCT,
87 /* - 45 2d */ BOOST_CC_PUNCT,
88 /* . 46 2e */ BOOST_CC_PUNCT,
89 /* / 47 2f */ BOOST_CC_PUNCT,
90 /* 0 48 30 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
91 /* 1 49 31 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
92 /* 2 50 32 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
93 /* 3 51 33 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
94 /* 4 52 34 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
95 /* 5 53 35 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
96 /* 6 54 36 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
97 /* 7 55 37 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
98 /* 8 56 38 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
99 /* 9 57 39 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
100 /* : 58 3a */ BOOST_CC_PUNCT,
101 /* ; 59 3b */ BOOST_CC_PUNCT,
102 /* < 60 3c */ BOOST_CC_PUNCT,
103 /* = 61 3d */ BOOST_CC_PUNCT,
104 /* > 62 3e */ BOOST_CC_PUNCT,
105 /* ? 63 3f */ BOOST_CC_PUNCT,
106 /* @ 64 40 */ BOOST_CC_PUNCT,
107 /* A 65 41 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
108 /* B 66 42 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
109 /* C 67 43 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
110 /* D 68 44 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
111 /* E 69 45 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
112 /* F 70 46 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
113 /* G 71 47 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
114 /* H 72 48 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
115 /* I 73 49 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
116 /* J 74 4a */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
117 /* K 75 4b */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
118 /* L 76 4c */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
119 /* M 77 4d */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
120 /* N 78 4e */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
121 /* O 79 4f */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
122 /* P 80 50 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
123 /* Q 81 51 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
124 /* R 82 52 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
125 /* S 83 53 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
126 /* T 84 54 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
127 /* U 85 55 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
128 /* V 86 56 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
129 /* W 87 57 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
130 /* X 88 58 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
131 /* Y 89 59 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
132 /* Z 90 5a */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
133 /* [ 91 5b */ BOOST_CC_PUNCT,
134 /* \ 92 5c */ BOOST_CC_PUNCT,
135 /* ] 93 5d */ BOOST_CC_PUNCT,
136 /* ^ 94 5e */ BOOST_CC_PUNCT,
137 /* _ 95 5f */ BOOST_CC_PUNCT,
138 /* ` 96 60 */ BOOST_CC_PUNCT,
139 /* a 97 61 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
140 /* b 98 62 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
141 /* c 99 63 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
142 /* d 100 64 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
143 /* e 101 65 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
144 /* f 102 66 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
145 /* g 103 67 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
146 /* h 104 68 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
147 /* i 105 69 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
148 /* j 106 6a */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
149 /* k 107 6b */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
150 /* l 108 6c */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
151 /* m 109 6d */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
152 /* n 110 6e */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
153 /* o 111 6f */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
154 /* p 112 70 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
155 /* q 113 71 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
156 /* r 114 72 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
157 /* s 115 73 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
158 /* t 116 74 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
159 /* u 117 75 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
160 /* v 118 76 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
161 /* w 119 77 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
162 /* x 120 78 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
163 /* y 121 79 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
164 /* z 122 7a */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
165 /* { 123 7b */ BOOST_CC_PUNCT,
166 /* | 124 7c */ BOOST_CC_PUNCT,
167 /* } 125 7d */ BOOST_CC_PUNCT,
168 /* ~ 126 7e */ BOOST_CC_PUNCT,
169 /* DEL 127 7f */ BOOST_CC_CTRL,
170 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
171 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
172 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 ///////////////////////////////////////////////////////////////////////////
181 // Test characters for specified conditions (using ASCII)
182 ///////////////////////////////////////////////////////////////////////////
185 typedef char char_type;
190 return 0 == (ch & ~0x7f);
202 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
203 return (ascii_char_types[ch] & BOOST_CC_ALPHA)
204 || (ascii_char_types[ch] & BOOST_CC_DIGIT);
210 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
211 return (ascii_char_types[ch] & BOOST_CC_ALPHA) ? true : false;
217 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
218 return (ascii_char_types[ch] & BOOST_CC_DIGIT) ? true : false;
224 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
225 return (ascii_char_types[ch] & BOOST_CC_XDIGIT) ? true : false;
231 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
232 return (ascii_char_types[ch] & BOOST_CC_CTRL) ? true : false;
238 return ('\x21' <= ch && ch <= '\x7e');
244 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
245 return (ascii_char_types[ch] & BOOST_CC_LOWER) ? true : false;
251 return ('\x20' <= ch && ch <= '\x7e');
257 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
258 return (ascii_char_types[ch] & BOOST_CC_PUNCT) ? true : false;
264 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
265 return (ascii_char_types[ch] & BOOST_CC_SPACE) ? true : false;
269 isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch)
271 return ('\x09' == ch || '\x20' == ch);
277 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
278 return (ascii_char_types[ch] & BOOST_CC_UPPER) ? true : false;
281 ///////////////////////////////////////////////////////////////////////
282 // Simple character conversions
283 ///////////////////////////////////////////////////////////////////////
288 return isupper(ch) ? (ch - 'A' + 'a') : ch;
294 return islower(ch) ? (ch - 'a' + 'A') : ch;
297 static ::boost::uint32_t
306 ///////////////////////////////////////////////////////////////////////////////
308 ///////////////////////////////////////////////////////////////////////////////
309 #undef BOOST_CC_DIGIT
310 #undef BOOST_CC_XDIGIT
311 #undef BOOST_CC_ALPHA
313 #undef BOOST_CC_LOWER
314 #undef BOOST_CC_UPPER
315 #undef BOOST_CC_PUNCT
316 #undef BOOST_CC_SPACE