]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/spirit/include/boost/spirit/home/support/char_encoding/unicode/query.hpp
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / boost / libs / spirit / include / boost / spirit / home / support / char_encoding / unicode / query.hpp
1 /*=============================================================================
2 Copyright (c) 2001-2011 Joel de Guzman
3
4 Distributed under the Boost Software License, Version 1.0. (See accompanying
5 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
7 Autogenerated by MultiStageTable.py (Unicode multi-stage
8 table builder) (c) Peter Kankowski, 2008
9 ==============================================================================*/
10 #if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010)
11 #define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010
12
13 #include <boost/cstdint.hpp>
14
15 # include "category_table.hpp"
16 # include "script_table.hpp"
17 # include "lowercase_table.hpp"
18 # include "uppercase_table.hpp"
19
20 namespace boost { namespace spirit { namespace ucd
21 {
22 // This header provides Basic (Level 1) Unicode Support
23 // See http://unicode.org/reports/tr18/ for details
24
25 struct properties
26 {
27 // bit pattern: xxMMMCCC
28 // MMM: major_category
29 // CCC: category
30
31 enum major_category
32 {
33 letter,
34 mark,
35 number,
36 separator,
37 other,
38 punctuation,
39 symbol
40 };
41
42 enum category
43 {
44 uppercase_letter = 0, // [Lu] an uppercase letter
45 lowercase_letter, // [Ll] a lowercase letter
46 titlecase_letter, // [Lt] a digraphic character, with first part uppercase
47 modifier_letter, // [Lm] a modifier letter
48 other_letter, // [Lo] other letters, including syllables and ideographs
49
50 nonspacing_mark = 8, // [Mn] a nonspacing combining mark (zero advance width)
51 enclosing_mark, // [Me] an enclosing combining mark
52 spacing_mark, // [Mc] a spacing combining mark (positive advance width)
53
54 decimal_number = 16, // [Nd] a decimal digit
55 letter_number, // [Nl] a letterlike numeric character
56 other_number, // [No] a numeric character of other type
57
58 space_separator = 24, // [Zs] a space character (of various non-zero widths)
59 line_separator, // [Zl] U+2028 LINE SEPARATOR only
60 paragraph_separator, // [Zp] U+2029 PARAGRAPH SEPARATOR only
61
62 control = 32, // [Cc] a C0 or C1 control code
63 format, // [Cf] a format control character
64 private_use, // [Co] a private-use character
65 surrogate, // [Cs] a surrogate code point
66 unassigned, // [Cn] a reserved unassigned code point or a noncharacter
67
68 dash_punctuation = 40, // [Pd] a dash or hyphen punctuation mark
69 open_punctuation, // [Ps] an opening punctuation mark (of a pair)
70 close_punctuation, // [Pe] a closing punctuation mark (of a pair)
71 connector_punctuation, // [Pc] a connecting punctuation mark, like a tie
72 other_punctuation, // [Po] a punctuation mark of other type
73 initial_punctuation, // [Pi] an initial quotation mark
74 final_punctuation, // [Pf] a final quotation mark
75
76 math_symbol = 48, // [Sm] a symbol of primarily mathematical use
77 currency_symbol, // [Sc] a currency sign
78 modifier_symbol, // [Sk] a non-letterlike modifier symbol
79 other_symbol // [So] a symbol of other type
80 };
81
82 enum derived_properties
83 {
84 alphabetic = 64,
85 uppercase = 128,
86 lowercase = 256,
87 white_space = 512,
88 hex_digit = 1024,
89 noncharacter_code_point = 2048,
90 default_ignorable_code_point = 4096
91 };
92
93 enum script
94 {
95 arabic = 0,
96 imperial_aramaic = 1,
97 armenian = 2,
98 avestan = 3,
99 balinese = 4,
100 bamum = 5,
101 bengali = 6,
102 bopomofo = 7,
103 braille = 8,
104 buginese = 9,
105 buhid = 10,
106 canadian_aboriginal = 11,
107 carian = 12,
108 cham = 13,
109 cherokee = 14,
110 coptic = 15,
111 cypriot = 16,
112 cyrillic = 17,
113 devanagari = 18,
114 deseret = 19,
115 egyptian_hieroglyphs = 20,
116 ethiopic = 21,
117 georgian = 22,
118 glagolitic = 23,
119 gothic = 24,
120 greek = 25,
121 gujarati = 26,
122 gurmukhi = 27,
123 hangul = 28,
124 han = 29,
125 hanunoo = 30,
126 hebrew = 31,
127 hiragana = 32,
128 katakana_or_hiragana = 33,
129 old_italic = 34,
130 javanese = 35,
131 kayah_li = 36,
132 katakana = 37,
133 kharoshthi = 38,
134 khmer = 39,
135 kannada = 40,
136 kaithi = 41,
137 tai_tham = 42,
138 lao = 43,
139 latin = 44,
140 lepcha = 45,
141 limbu = 46,
142 linear_b = 47,
143 lisu = 48,
144 lycian = 49,
145 lydian = 50,
146 malayalam = 51,
147 mongolian = 52,
148 meetei_mayek = 53,
149 myanmar = 54,
150 nko = 55,
151 ogham = 56,
152 ol_chiki = 57,
153 old_turkic = 58,
154 oriya = 59,
155 osmanya = 60,
156 phags_pa = 61,
157 inscriptional_pahlavi = 62,
158 phoenician = 63,
159 inscriptional_parthian = 64,
160 rejang = 65,
161 runic = 66,
162 samaritan = 67,
163 old_south_arabian = 68,
164 saurashtra = 69,
165 shavian = 70,
166 sinhala = 71,
167 sundanese = 72,
168 syloti_nagri = 73,
169 syriac = 74,
170 tagbanwa = 75,
171 tai_le = 76,
172 new_tai_lue = 77,
173 tamil = 78,
174 tai_viet = 79,
175 telugu = 80,
176 tifinagh = 81,
177 tagalog = 82,
178 thaana = 83,
179 thai = 84,
180 tibetan = 85,
181 ugaritic = 86,
182 vai = 87,
183 old_persian = 88,
184 cuneiform = 89,
185 yi = 90,
186 inherited = 91,
187 common = 92,
188 unknown = 93
189 };
190 };
191
192 inline properties::category get_category(::boost::uint32_t ch)
193 {
194 return static_cast<properties::category>(detail::category_lookup(ch) & 0x3F);
195 }
196
197 inline properties::major_category get_major_category(::boost::uint32_t ch)
198 {
199 return static_cast<properties::major_category>(get_category(ch) >> 3);
200 }
201
202 inline bool is_punctuation(::boost::uint32_t ch)
203 {
204 return get_major_category(ch) == properties::punctuation;
205 }
206
207 inline bool is_decimal_number(::boost::uint32_t ch)
208 {
209 return get_category(ch) == properties::decimal_number;
210 }
211
212 inline bool is_hex_digit(::boost::uint32_t ch)
213 {
214 return (detail::category_lookup(ch) & properties::hex_digit) != 0;
215 }
216
217 inline bool is_control(::boost::uint32_t ch)
218 {
219 return get_category(ch) == properties::control;
220 }
221
222 inline bool is_alphabetic(::boost::uint32_t ch)
223 {
224 return (detail::category_lookup(ch) & properties::alphabetic) != 0;
225 }
226
227 inline bool is_alphanumeric(::boost::uint32_t ch)
228 {
229 return is_decimal_number(ch) || is_alphabetic(ch);
230 }
231
232 inline bool is_uppercase(::boost::uint32_t ch)
233 {
234 return (detail::category_lookup(ch) & properties::uppercase) != 0;
235 }
236
237 inline bool is_lowercase(::boost::uint32_t ch)
238 {
239 return (detail::category_lookup(ch) & properties::lowercase) != 0;
240 }
241
242 inline bool is_white_space(::boost::uint32_t ch)
243 {
244 return (detail::category_lookup(ch) & properties::white_space) != 0;
245 }
246
247 inline bool is_blank(::boost::uint32_t ch)
248 {
249 switch (ch)
250 {
251 case '\n': case '\v': case '\f': case '\r':
252 return false;
253 default:
254 return is_white_space(ch)
255 && !( get_category(ch) == properties::line_separator
256 || get_category(ch) == properties::paragraph_separator
257 );
258 }
259 }
260
261 inline bool is_graph(::boost::uint32_t ch)
262 {
263 return !( is_white_space(ch)
264 || get_category(ch) == properties::control
265 || get_category(ch) == properties::surrogate
266 || get_category(ch) == properties::unassigned
267 );
268 }
269
270 inline bool is_print(::boost::uint32_t ch)
271 {
272 return (is_graph(ch) || is_blank(ch)) && !is_control(ch);
273 }
274
275 inline bool is_noncharacter_code_point(::boost::uint32_t ch)
276 {
277 return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0;
278 }
279
280 inline bool is_default_ignorable_code_point(::boost::uint32_t ch)
281 {
282 return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0;
283 }
284
285 inline properties::script get_script(::boost::uint32_t ch)
286 {
287 return static_cast<properties::script>(detail::script_lookup(ch) & 0x7F);
288 }
289
290 inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch)
291 {
292 // The table returns 0 to signal that this code maps to itself
293 ::boost::uint32_t r = detail::lowercase_lookup(ch);
294 return (r == 0)? ch : r;
295 }
296
297 inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch)
298 {
299 // The table returns 0 to signal that this code maps to itself
300 ::boost::uint32_t r = detail::uppercase_lookup(ch);
301 return (r == 0)? ch : r;
302 }
303 }}}
304
305 #endif