]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*============================================================================= |
2 | Copyright (c) 2001-2011 Joel de Guzman | |
3 | ||
4 | Distributed under the Boost Software License, Version 1.0. (See accompanying | |
5 | file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
6 | ||
7 | Autogenerated by MultiStageTable.py (Unicode multi-stage | |
8 | table builder) (c) Peter Kankowski, 2008 | |
9 | ==============================================================================*/ | |
10 | #if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010) | |
11 | #define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010 | |
12 | ||
13 | #include <boost/cstdint.hpp> | |
14 | ||
15 | # include "category_table.hpp" | |
16 | # include "script_table.hpp" | |
17 | # include "lowercase_table.hpp" | |
18 | # include "uppercase_table.hpp" | |
19 | ||
20 | namespace boost { namespace spirit { namespace ucd | |
21 | { | |
22 | // This header provides Basic (Level 1) Unicode Support | |
23 | // See http://unicode.org/reports/tr18/ for details | |
24 | ||
25 | struct properties | |
26 | { | |
27 | // bit pattern: xxMMMCCC | |
28 | // MMM: major_category | |
29 | // CCC: category | |
30 | ||
31 | enum major_category | |
32 | { | |
33 | letter, | |
34 | mark, | |
35 | number, | |
36 | separator, | |
37 | other, | |
38 | punctuation, | |
39 | symbol | |
40 | }; | |
41 | ||
42 | enum category | |
43 | { | |
44 | uppercase_letter = 0, // [Lu] an uppercase letter | |
45 | lowercase_letter, // [Ll] a lowercase letter | |
46 | titlecase_letter, // [Lt] a digraphic character, with first part uppercase | |
47 | modifier_letter, // [Lm] a modifier letter | |
48 | other_letter, // [Lo] other letters, including syllables and ideographs | |
49 | ||
50 | nonspacing_mark = 8, // [Mn] a nonspacing combining mark (zero advance width) | |
51 | enclosing_mark, // [Me] an enclosing combining mark | |
52 | spacing_mark, // [Mc] a spacing combining mark (positive advance width) | |
53 | ||
54 | decimal_number = 16, // [Nd] a decimal digit | |
55 | letter_number, // [Nl] a letterlike numeric character | |
56 | other_number, // [No] a numeric character of other type | |
57 | ||
58 | space_separator = 24, // [Zs] a space character (of various non-zero widths) | |
59 | line_separator, // [Zl] U+2028 LINE SEPARATOR only | |
60 | paragraph_separator, // [Zp] U+2029 PARAGRAPH SEPARATOR only | |
61 | ||
62 | control = 32, // [Cc] a C0 or C1 control code | |
63 | format, // [Cf] a format control character | |
64 | private_use, // [Co] a private-use character | |
65 | surrogate, // [Cs] a surrogate code point | |
66 | unassigned, // [Cn] a reserved unassigned code point or a noncharacter | |
67 | ||
68 | dash_punctuation = 40, // [Pd] a dash or hyphen punctuation mark | |
69 | open_punctuation, // [Ps] an opening punctuation mark (of a pair) | |
70 | close_punctuation, // [Pe] a closing punctuation mark (of a pair) | |
71 | connector_punctuation, // [Pc] a connecting punctuation mark, like a tie | |
72 | other_punctuation, // [Po] a punctuation mark of other type | |
73 | initial_punctuation, // [Pi] an initial quotation mark | |
74 | final_punctuation, // [Pf] a final quotation mark | |
75 | ||
76 | math_symbol = 48, // [Sm] a symbol of primarily mathematical use | |
77 | currency_symbol, // [Sc] a currency sign | |
78 | modifier_symbol, // [Sk] a non-letterlike modifier symbol | |
79 | other_symbol // [So] a symbol of other type | |
80 | }; | |
81 | ||
82 | enum derived_properties | |
83 | { | |
84 | alphabetic = 64, | |
85 | uppercase = 128, | |
86 | lowercase = 256, | |
87 | white_space = 512, | |
88 | hex_digit = 1024, | |
89 | noncharacter_code_point = 2048, | |
90 | default_ignorable_code_point = 4096 | |
91 | }; | |
92 | ||
93 | enum script | |
94 | { | |
95 | arabic = 0, | |
96 | imperial_aramaic = 1, | |
97 | armenian = 2, | |
98 | avestan = 3, | |
99 | balinese = 4, | |
100 | bamum = 5, | |
101 | bengali = 6, | |
102 | bopomofo = 7, | |
103 | braille = 8, | |
104 | buginese = 9, | |
105 | buhid = 10, | |
106 | canadian_aboriginal = 11, | |
107 | carian = 12, | |
108 | cham = 13, | |
109 | cherokee = 14, | |
110 | coptic = 15, | |
111 | cypriot = 16, | |
112 | cyrillic = 17, | |
113 | devanagari = 18, | |
114 | deseret = 19, | |
115 | egyptian_hieroglyphs = 20, | |
116 | ethiopic = 21, | |
117 | georgian = 22, | |
118 | glagolitic = 23, | |
119 | gothic = 24, | |
120 | greek = 25, | |
121 | gujarati = 26, | |
122 | gurmukhi = 27, | |
123 | hangul = 28, | |
124 | han = 29, | |
125 | hanunoo = 30, | |
126 | hebrew = 31, | |
127 | hiragana = 32, | |
128 | katakana_or_hiragana = 33, | |
129 | old_italic = 34, | |
130 | javanese = 35, | |
131 | kayah_li = 36, | |
132 | katakana = 37, | |
133 | kharoshthi = 38, | |
134 | khmer = 39, | |
135 | kannada = 40, | |
136 | kaithi = 41, | |
137 | tai_tham = 42, | |
138 | lao = 43, | |
139 | latin = 44, | |
140 | lepcha = 45, | |
141 | limbu = 46, | |
142 | linear_b = 47, | |
143 | lisu = 48, | |
144 | lycian = 49, | |
145 | lydian = 50, | |
146 | malayalam = 51, | |
147 | mongolian = 52, | |
148 | meetei_mayek = 53, | |
149 | myanmar = 54, | |
150 | nko = 55, | |
151 | ogham = 56, | |
152 | ol_chiki = 57, | |
153 | old_turkic = 58, | |
154 | oriya = 59, | |
155 | osmanya = 60, | |
156 | phags_pa = 61, | |
157 | inscriptional_pahlavi = 62, | |
158 | phoenician = 63, | |
159 | inscriptional_parthian = 64, | |
160 | rejang = 65, | |
161 | runic = 66, | |
162 | samaritan = 67, | |
163 | old_south_arabian = 68, | |
164 | saurashtra = 69, | |
165 | shavian = 70, | |
166 | sinhala = 71, | |
167 | sundanese = 72, | |
168 | syloti_nagri = 73, | |
169 | syriac = 74, | |
170 | tagbanwa = 75, | |
171 | tai_le = 76, | |
172 | new_tai_lue = 77, | |
173 | tamil = 78, | |
174 | tai_viet = 79, | |
175 | telugu = 80, | |
176 | tifinagh = 81, | |
177 | tagalog = 82, | |
178 | thaana = 83, | |
179 | thai = 84, | |
180 | tibetan = 85, | |
181 | ugaritic = 86, | |
182 | vai = 87, | |
183 | old_persian = 88, | |
184 | cuneiform = 89, | |
185 | yi = 90, | |
186 | inherited = 91, | |
187 | common = 92, | |
188 | unknown = 93 | |
189 | }; | |
190 | }; | |
191 | ||
192 | inline properties::category get_category(::boost::uint32_t ch) | |
193 | { | |
194 | return static_cast<properties::category>(detail::category_lookup(ch) & 0x3F); | |
195 | } | |
196 | ||
197 | inline properties::major_category get_major_category(::boost::uint32_t ch) | |
198 | { | |
199 | return static_cast<properties::major_category>(get_category(ch) >> 3); | |
200 | } | |
201 | ||
202 | inline bool is_punctuation(::boost::uint32_t ch) | |
203 | { | |
204 | return get_major_category(ch) == properties::punctuation; | |
205 | } | |
206 | ||
207 | inline bool is_decimal_number(::boost::uint32_t ch) | |
208 | { | |
209 | return get_category(ch) == properties::decimal_number; | |
210 | } | |
211 | ||
212 | inline bool is_hex_digit(::boost::uint32_t ch) | |
213 | { | |
214 | return (detail::category_lookup(ch) & properties::hex_digit) != 0; | |
215 | } | |
216 | ||
217 | inline bool is_control(::boost::uint32_t ch) | |
218 | { | |
219 | return get_category(ch) == properties::control; | |
220 | } | |
221 | ||
222 | inline bool is_alphabetic(::boost::uint32_t ch) | |
223 | { | |
224 | return (detail::category_lookup(ch) & properties::alphabetic) != 0; | |
225 | } | |
226 | ||
227 | inline bool is_alphanumeric(::boost::uint32_t ch) | |
228 | { | |
229 | return is_decimal_number(ch) || is_alphabetic(ch); | |
230 | } | |
231 | ||
232 | inline bool is_uppercase(::boost::uint32_t ch) | |
233 | { | |
234 | return (detail::category_lookup(ch) & properties::uppercase) != 0; | |
235 | } | |
236 | ||
237 | inline bool is_lowercase(::boost::uint32_t ch) | |
238 | { | |
239 | return (detail::category_lookup(ch) & properties::lowercase) != 0; | |
240 | } | |
241 | ||
242 | inline bool is_white_space(::boost::uint32_t ch) | |
243 | { | |
244 | return (detail::category_lookup(ch) & properties::white_space) != 0; | |
245 | } | |
246 | ||
247 | inline bool is_blank(::boost::uint32_t ch) | |
248 | { | |
249 | switch (ch) | |
250 | { | |
251 | case '\n': case '\v': case '\f': case '\r': | |
252 | return false; | |
253 | default: | |
254 | return is_white_space(ch) | |
255 | && !( get_category(ch) == properties::line_separator | |
256 | || get_category(ch) == properties::paragraph_separator | |
257 | ); | |
258 | } | |
259 | } | |
260 | ||
261 | inline bool is_graph(::boost::uint32_t ch) | |
262 | { | |
263 | return !( is_white_space(ch) | |
264 | || get_category(ch) == properties::control | |
265 | || get_category(ch) == properties::surrogate | |
266 | || get_category(ch) == properties::unassigned | |
267 | ); | |
268 | } | |
269 | ||
270 | inline bool is_print(::boost::uint32_t ch) | |
271 | { | |
272 | return (is_graph(ch) || is_blank(ch)) && !is_control(ch); | |
273 | } | |
274 | ||
275 | inline bool is_noncharacter_code_point(::boost::uint32_t ch) | |
276 | { | |
277 | return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0; | |
278 | } | |
279 | ||
280 | inline bool is_default_ignorable_code_point(::boost::uint32_t ch) | |
281 | { | |
282 | return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0; | |
283 | } | |
284 | ||
285 | inline properties::script get_script(::boost::uint32_t ch) | |
286 | { | |
287 | return static_cast<properties::script>(detail::script_lookup(ch) & 0x7F); | |
288 | } | |
289 | ||
290 | inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch) | |
291 | { | |
292 | // The table returns 0 to signal that this code maps to itself | |
293 | ::boost::uint32_t r = detail::lowercase_lookup(ch); | |
294 | return (r == 0)? ch : r; | |
295 | } | |
296 | ||
297 | inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch) | |
298 | { | |
299 | // The table returns 0 to signal that this code maps to itself | |
300 | ::boost::uint32_t r = detail::uppercase_lookup(ch); | |
301 | return (r == 0)? ch : r; | |
302 | } | |
303 | }}} | |
304 | ||
305 | #endif |