1 /*=============================================================================
2 Copyright (c) 2001-2011 Hartmut Kaiser
3 Copyright (c) 2001-2011 Joel de Guzman
5 Distributed under the Boost Software License, Version 1.0. (See accompanying
6 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 =============================================================================*/
8 #if !defined(BOOST_SPIRIT_UNICODE_1_JANUARY_12_2010_0728PM)
9 #define BOOST_SPIRIT_UNICODE_1_JANUARY_12_2010_0728PM
15 #include <boost/cstdint.hpp>
16 #include <boost/spirit/home/support/char_encoding/unicode/query.hpp>
18 namespace boost { namespace spirit { namespace char_encoding
20 ///////////////////////////////////////////////////////////////////////////
21 // Test characters for specified conditions (using iso8859-1)
22 ///////////////////////////////////////////////////////////////////////////
25 #ifdef BOOST_NO_CXX11_CHAR32_T
26 typedef ::boost::uint32_t char_type;
28 typedef char32_t char_type;
30 typedef ::boost::uint32_t classify_type;
32 ///////////////////////////////////////////////////////////////////////////
34 ///////////////////////////////////////////////////////////////////////////
36 isascii_(char_type ch)
38 return 0 == (ch & ~0x7f);
44 // unicode code points in the range 0x00 to 0x10FFFF
45 return ch <= 0x10FFFF;
51 return ucd::is_alphanumeric(ch);
57 return ucd::is_alphabetic(ch);
63 return ucd::is_decimal_number(ch);
67 isxdigit(char_type ch)
69 return ucd::is_hex_digit(ch);
75 return ucd::is_control(ch);
81 return ucd::is_graph(ch);
87 return ucd::is_lowercase(ch);
93 return ucd::is_print(ch);
99 return ucd::is_punctuation(ch);
103 isspace(char_type ch)
105 return ucd::is_white_space(ch);
109 isblank BOOST_PREVENT_MACRO_SUBSTITUTION (char_type ch)
111 return ucd::is_blank(ch);
115 isupper(char_type ch)
117 return ucd::is_uppercase(ch);
120 ///////////////////////////////////////////////////////////////////////////
121 // Simple character conversions
122 ///////////////////////////////////////////////////////////////////////////
125 tolower(char_type ch)
127 return ucd::to_lowercase(ch);
131 toupper(char_type ch)
133 return ucd::to_uppercase(ch);
136 static ::boost::uint32_t
142 ///////////////////////////////////////////////////////////////////////////
144 ///////////////////////////////////////////////////////////////////////////
145 #define BOOST_SPIRIT_MAJOR_CATEGORY(name) \
147 is_##name(char_type ch) \
149 return ucd::get_major_category(ch) == ucd::properties::name; \
153 BOOST_SPIRIT_MAJOR_CATEGORY(letter)
154 BOOST_SPIRIT_MAJOR_CATEGORY(mark)
155 BOOST_SPIRIT_MAJOR_CATEGORY(number)
156 BOOST_SPIRIT_MAJOR_CATEGORY(separator)
157 BOOST_SPIRIT_MAJOR_CATEGORY(other)
158 BOOST_SPIRIT_MAJOR_CATEGORY(punctuation)
159 BOOST_SPIRIT_MAJOR_CATEGORY(symbol)
161 ///////////////////////////////////////////////////////////////////////////
162 // General Categories
163 ///////////////////////////////////////////////////////////////////////////
164 #define BOOST_SPIRIT_CATEGORY(name) \
166 is_##name(char_type ch) \
168 return ucd::get_category(ch) == ucd::properties::name; \
172 BOOST_SPIRIT_CATEGORY(uppercase_letter)
173 BOOST_SPIRIT_CATEGORY(lowercase_letter)
174 BOOST_SPIRIT_CATEGORY(titlecase_letter)
175 BOOST_SPIRIT_CATEGORY(modifier_letter)
176 BOOST_SPIRIT_CATEGORY(other_letter)
178 BOOST_SPIRIT_CATEGORY(nonspacing_mark)
179 BOOST_SPIRIT_CATEGORY(enclosing_mark)
180 BOOST_SPIRIT_CATEGORY(spacing_mark)
182 BOOST_SPIRIT_CATEGORY(decimal_number)
183 BOOST_SPIRIT_CATEGORY(letter_number)
184 BOOST_SPIRIT_CATEGORY(other_number)
186 BOOST_SPIRIT_CATEGORY(space_separator)
187 BOOST_SPIRIT_CATEGORY(line_separator)
188 BOOST_SPIRIT_CATEGORY(paragraph_separator)
190 BOOST_SPIRIT_CATEGORY(control)
191 BOOST_SPIRIT_CATEGORY(format)
192 BOOST_SPIRIT_CATEGORY(private_use)
193 BOOST_SPIRIT_CATEGORY(surrogate)
194 BOOST_SPIRIT_CATEGORY(unassigned)
196 BOOST_SPIRIT_CATEGORY(dash_punctuation)
197 BOOST_SPIRIT_CATEGORY(open_punctuation)
198 BOOST_SPIRIT_CATEGORY(close_punctuation)
199 BOOST_SPIRIT_CATEGORY(connector_punctuation)
200 BOOST_SPIRIT_CATEGORY(other_punctuation)
201 BOOST_SPIRIT_CATEGORY(initial_punctuation)
202 BOOST_SPIRIT_CATEGORY(final_punctuation)
204 BOOST_SPIRIT_CATEGORY(math_symbol)
205 BOOST_SPIRIT_CATEGORY(currency_symbol)
206 BOOST_SPIRIT_CATEGORY(modifier_symbol)
207 BOOST_SPIRIT_CATEGORY(other_symbol)
209 ///////////////////////////////////////////////////////////////////////////
210 // Derived Categories
211 ///////////////////////////////////////////////////////////////////////////
212 #define BOOST_SPIRIT_DERIVED_CATEGORY(name) \
214 is_##name(char_type ch) \
216 return ucd::is_##name(ch); \
220 BOOST_SPIRIT_DERIVED_CATEGORY(alphabetic)
221 BOOST_SPIRIT_DERIVED_CATEGORY(uppercase)
222 BOOST_SPIRIT_DERIVED_CATEGORY(lowercase)
223 BOOST_SPIRIT_DERIVED_CATEGORY(white_space)
224 BOOST_SPIRIT_DERIVED_CATEGORY(hex_digit)
225 BOOST_SPIRIT_DERIVED_CATEGORY(noncharacter_code_point)
226 BOOST_SPIRIT_DERIVED_CATEGORY(default_ignorable_code_point)
228 ///////////////////////////////////////////////////////////////////////////
230 ///////////////////////////////////////////////////////////////////////////
231 #define BOOST_SPIRIT_SCRIPT(name) \
233 is_##name(char_type ch) \
235 return ucd::get_script(ch) == ucd::properties::name; \
239 BOOST_SPIRIT_SCRIPT(arabic)
240 BOOST_SPIRIT_SCRIPT(imperial_aramaic)
241 BOOST_SPIRIT_SCRIPT(armenian)
242 BOOST_SPIRIT_SCRIPT(avestan)
243 BOOST_SPIRIT_SCRIPT(balinese)
244 BOOST_SPIRIT_SCRIPT(bamum)
245 BOOST_SPIRIT_SCRIPT(bengali)
246 BOOST_SPIRIT_SCRIPT(bopomofo)
247 BOOST_SPIRIT_SCRIPT(braille)
248 BOOST_SPIRIT_SCRIPT(buginese)
249 BOOST_SPIRIT_SCRIPT(buhid)
250 BOOST_SPIRIT_SCRIPT(canadian_aboriginal)
251 BOOST_SPIRIT_SCRIPT(carian)
252 BOOST_SPIRIT_SCRIPT(cham)
253 BOOST_SPIRIT_SCRIPT(cherokee)
254 BOOST_SPIRIT_SCRIPT(coptic)
255 BOOST_SPIRIT_SCRIPT(cypriot)
256 BOOST_SPIRIT_SCRIPT(cyrillic)
257 BOOST_SPIRIT_SCRIPT(devanagari)
258 BOOST_SPIRIT_SCRIPT(deseret)
259 BOOST_SPIRIT_SCRIPT(egyptian_hieroglyphs)
260 BOOST_SPIRIT_SCRIPT(ethiopic)
261 BOOST_SPIRIT_SCRIPT(georgian)
262 BOOST_SPIRIT_SCRIPT(glagolitic)
263 BOOST_SPIRIT_SCRIPT(gothic)
264 BOOST_SPIRIT_SCRIPT(greek)
265 BOOST_SPIRIT_SCRIPT(gujarati)
266 BOOST_SPIRIT_SCRIPT(gurmukhi)
267 BOOST_SPIRIT_SCRIPT(hangul)
268 BOOST_SPIRIT_SCRIPT(han)
269 BOOST_SPIRIT_SCRIPT(hanunoo)
270 BOOST_SPIRIT_SCRIPT(hebrew)
271 BOOST_SPIRIT_SCRIPT(hiragana)
272 BOOST_SPIRIT_SCRIPT(katakana_or_hiragana)
273 BOOST_SPIRIT_SCRIPT(old_italic)
274 BOOST_SPIRIT_SCRIPT(javanese)
275 BOOST_SPIRIT_SCRIPT(kayah_li)
276 BOOST_SPIRIT_SCRIPT(katakana)
277 BOOST_SPIRIT_SCRIPT(kharoshthi)
278 BOOST_SPIRIT_SCRIPT(khmer)
279 BOOST_SPIRIT_SCRIPT(kannada)
280 BOOST_SPIRIT_SCRIPT(kaithi)
281 BOOST_SPIRIT_SCRIPT(tai_tham)
282 BOOST_SPIRIT_SCRIPT(lao)
283 BOOST_SPIRIT_SCRIPT(latin)
284 BOOST_SPIRIT_SCRIPT(lepcha)
285 BOOST_SPIRIT_SCRIPT(limbu)
286 BOOST_SPIRIT_SCRIPT(linear_b)
287 BOOST_SPIRIT_SCRIPT(lisu)
288 BOOST_SPIRIT_SCRIPT(lycian)
289 BOOST_SPIRIT_SCRIPT(lydian)
290 BOOST_SPIRIT_SCRIPT(malayalam)
291 BOOST_SPIRIT_SCRIPT(mongolian)
292 BOOST_SPIRIT_SCRIPT(meetei_mayek)
293 BOOST_SPIRIT_SCRIPT(myanmar)
294 BOOST_SPIRIT_SCRIPT(nko)
295 BOOST_SPIRIT_SCRIPT(ogham)
296 BOOST_SPIRIT_SCRIPT(ol_chiki)
297 BOOST_SPIRIT_SCRIPT(old_turkic)
298 BOOST_SPIRIT_SCRIPT(oriya)
299 BOOST_SPIRIT_SCRIPT(osmanya)
300 BOOST_SPIRIT_SCRIPT(phags_pa)
301 BOOST_SPIRIT_SCRIPT(inscriptional_pahlavi)
302 BOOST_SPIRIT_SCRIPT(phoenician)
303 BOOST_SPIRIT_SCRIPT(inscriptional_parthian)
304 BOOST_SPIRIT_SCRIPT(rejang)
305 BOOST_SPIRIT_SCRIPT(runic)
306 BOOST_SPIRIT_SCRIPT(samaritan)
307 BOOST_SPIRIT_SCRIPT(old_south_arabian)
308 BOOST_SPIRIT_SCRIPT(saurashtra)
309 BOOST_SPIRIT_SCRIPT(shavian)
310 BOOST_SPIRIT_SCRIPT(sinhala)
311 BOOST_SPIRIT_SCRIPT(sundanese)
312 BOOST_SPIRIT_SCRIPT(syloti_nagri)
313 BOOST_SPIRIT_SCRIPT(syriac)
314 BOOST_SPIRIT_SCRIPT(tagbanwa)
315 BOOST_SPIRIT_SCRIPT(tai_le)
316 BOOST_SPIRIT_SCRIPT(new_tai_lue)
317 BOOST_SPIRIT_SCRIPT(tamil)
318 BOOST_SPIRIT_SCRIPT(tai_viet)
319 BOOST_SPIRIT_SCRIPT(telugu)
320 BOOST_SPIRIT_SCRIPT(tifinagh)
321 BOOST_SPIRIT_SCRIPT(tagalog)
322 BOOST_SPIRIT_SCRIPT(thaana)
323 BOOST_SPIRIT_SCRIPT(thai)
324 BOOST_SPIRIT_SCRIPT(tibetan)
325 BOOST_SPIRIT_SCRIPT(ugaritic)
326 BOOST_SPIRIT_SCRIPT(vai)
327 BOOST_SPIRIT_SCRIPT(old_persian)
328 BOOST_SPIRIT_SCRIPT(cuneiform)
329 BOOST_SPIRIT_SCRIPT(yi)
330 BOOST_SPIRIT_SCRIPT(inherited)
331 BOOST_SPIRIT_SCRIPT(common)
332 BOOST_SPIRIT_SCRIPT(unknown)
334 #undef BOOST_SPIRIT_MAJOR_CATEGORY
335 #undef BOOST_SPIRIT_CATEGORY
336 #undef BOOST_SPIRIT_DERIVED_CATEGORY
337 #undef BOOST_SPIRIT_SCRIPT