1 /*=============================================================================
2 Copyright (c) 2001-2011 Joel de Guzman
4 Distributed under the Boost Software License, Version 1.0. (See accompanying
5 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 Autogenerated by MultiStageTable.py (Unicode multi-stage
8 table builder) (c) Peter Kankowski, 2008
9 ==============================================================================*/
10 #if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010)
11 #define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010
13 #include <boost/cstdint.hpp>
15 # include "category_table.hpp"
16 # include "script_table.hpp"
17 # include "lowercase_table.hpp"
18 # include "uppercase_table.hpp"
20 namespace boost { namespace spirit { namespace ucd
22 // This header provides Basic (Level 1) Unicode Support
23 // See http://unicode.org/reports/tr18/ for details
27 // bit pattern: xxMMMCCC
28 // MMM: major_category
44 uppercase_letter = 0, // [Lu] an uppercase letter
45 lowercase_letter, // [Ll] a lowercase letter
46 titlecase_letter, // [Lt] a digraphic character, with first part uppercase
47 modifier_letter, // [Lm] a modifier letter
48 other_letter, // [Lo] other letters, including syllables and ideographs
50 nonspacing_mark = 8, // [Mn] a nonspacing combining mark (zero advance width)
51 enclosing_mark, // [Me] an enclosing combining mark
52 spacing_mark, // [Mc] a spacing combining mark (positive advance width)
54 decimal_number = 16, // [Nd] a decimal digit
55 letter_number, // [Nl] a letterlike numeric character
56 other_number, // [No] a numeric character of other type
58 space_separator = 24, // [Zs] a space character (of various non-zero widths)
59 line_separator, // [Zl] U+2028 LINE SEPARATOR only
60 paragraph_separator, // [Zp] U+2029 PARAGRAPH SEPARATOR only
62 control = 32, // [Cc] a C0 or C1 control code
63 format, // [Cf] a format control character
64 private_use, // [Co] a private-use character
65 surrogate, // [Cs] a surrogate code point
66 unassigned, // [Cn] a reserved unassigned code point or a noncharacter
68 dash_punctuation = 40, // [Pd] a dash or hyphen punctuation mark
69 open_punctuation, // [Ps] an opening punctuation mark (of a pair)
70 close_punctuation, // [Pe] a closing punctuation mark (of a pair)
71 connector_punctuation, // [Pc] a connecting punctuation mark, like a tie
72 other_punctuation, // [Po] a punctuation mark of other type
73 initial_punctuation, // [Pi] an initial quotation mark
74 final_punctuation, // [Pf] a final quotation mark
76 math_symbol = 48, // [Sm] a symbol of primarily mathematical use
77 currency_symbol, // [Sc] a currency sign
78 modifier_symbol, // [Sk] a non-letterlike modifier symbol
79 other_symbol // [So] a symbol of other type
82 enum derived_properties
89 noncharacter_code_point = 2048,
90 default_ignorable_code_point = 4096
106 canadian_aboriginal = 11,
115 egyptian_hieroglyphs = 20,
128 katakana_or_hiragana = 33,
157 inscriptional_pahlavi = 62,
159 inscriptional_parthian = 64,
163 old_south_arabian = 68,
192 inline properties::category get_category(::boost::uint32_t ch)
194 return static_cast<properties::category>(detail::category_lookup(ch) & 0x3F);
197 inline properties::major_category get_major_category(::boost::uint32_t ch)
199 return static_cast<properties::major_category>(get_category(ch) >> 3);
202 inline bool is_punctuation(::boost::uint32_t ch)
204 return get_major_category(ch) == properties::punctuation;
207 inline bool is_decimal_number(::boost::uint32_t ch)
209 return get_category(ch) == properties::decimal_number;
212 inline bool is_hex_digit(::boost::uint32_t ch)
214 return (detail::category_lookup(ch) & properties::hex_digit) != 0;
217 inline bool is_control(::boost::uint32_t ch)
219 return get_category(ch) == properties::control;
222 inline bool is_alphabetic(::boost::uint32_t ch)
224 return (detail::category_lookup(ch) & properties::alphabetic) != 0;
227 inline bool is_alphanumeric(::boost::uint32_t ch)
229 return is_decimal_number(ch) || is_alphabetic(ch);
232 inline bool is_uppercase(::boost::uint32_t ch)
234 return (detail::category_lookup(ch) & properties::uppercase) != 0;
237 inline bool is_lowercase(::boost::uint32_t ch)
239 return (detail::category_lookup(ch) & properties::lowercase) != 0;
242 inline bool is_white_space(::boost::uint32_t ch)
244 return (detail::category_lookup(ch) & properties::white_space) != 0;
247 inline bool is_blank(::boost::uint32_t ch)
251 case '\n': case '\v': case '\f': case '\r':
254 return is_white_space(ch)
255 && !( get_category(ch) == properties::line_separator
256 || get_category(ch) == properties::paragraph_separator
261 inline bool is_graph(::boost::uint32_t ch)
263 return !( is_white_space(ch)
264 || get_category(ch) == properties::control
265 || get_category(ch) == properties::surrogate
266 || get_category(ch) == properties::unassigned
270 inline bool is_print(::boost::uint32_t ch)
272 return (is_graph(ch) || is_blank(ch)) && !is_control(ch);
275 inline bool is_noncharacter_code_point(::boost::uint32_t ch)
277 return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0;
280 inline bool is_default_ignorable_code_point(::boost::uint32_t ch)
282 return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0;
285 inline properties::script get_script(::boost::uint32_t ch)
287 return static_cast<properties::script>(detail::script_lookup(ch) & 0x7F);
290 inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch)
292 // The table returns 0 to signal that this code maps to itself
293 ::boost::uint32_t r = detail::lowercase_lookup(ch);
294 return (r == 0)? ch : r;
297 inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch)
299 // The table returns 0 to signal that this code maps to itself
300 ::boost::uint32_t r = detail::uppercase_lookup(ch);
301 return (r == 0)? ch : r;