]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // |
2 | // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) | |
3 | // | |
4 | // Distributed under the Boost Software License, Version 1.0. (See | |
5 | // accompanying file LICENSE_1_0.txt or copy at | |
6 | // http://www.boost.org/LICENSE_1_0.txt) | |
7 | // | |
8 | #ifndef BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED | |
9 | #define BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED | |
10 | ||
11 | #include <boost/locale/config.hpp> | |
12 | #include <boost/cstdint.hpp> | |
13 | #include <boost/assert.hpp> | |
14 | #ifdef BOOST_MSVC | |
15 | # pragma warning(push) | |
16 | # pragma warning(disable : 4275 4251 4231 4660) | |
17 | #endif | |
18 | ||
19 | ||
20 | namespace boost { | |
21 | ||
22 | namespace locale { | |
23 | ||
24 | /// | |
25 | /// \brief This namespase contains all operations required for boundary analysis of text | |
26 | /// | |
27 | namespace boundary { | |
28 | /// | |
29 | /// \defgroup boundary Boundary Analysis | |
30 | /// | |
31 | /// This module contains all operations required for boundary analysis of text: character, word, like and sentence boundaries | |
32 | /// | |
33 | /// @{ | |
34 | /// | |
35 | ||
36 | /// | |
37 | /// This type describes a possible boundary analysis alternatives. | |
38 | /// | |
39 | enum boundary_type { | |
40 | character, ///< Analyse the text for character boundaries | |
41 | word, ///< Analyse the text for word boundaries | |
42 | sentence, ///< Analyse the text for Find sentence boundaries | |
43 | line ///< Analyse the text for positions suitable for line breaks | |
44 | }; | |
45 | ||
46 | /// | |
47 | /// \brief Flags used with word boundary analysis -- the type of the word, line or sentence boundary found. | |
48 | /// | |
49 | /// It is a bit-mask that represents various combinations of rules used to select this specific boundary. | |
50 | /// | |
51 | typedef uint32_t rule_type; | |
52 | ||
53 | /// | |
54 | /// \anchor bl_boundary_word_rules | |
55 | /// \name Flags that describe a type of word selected | |
56 | /// @{ | |
57 | static const rule_type | |
58 | word_none = 0x0000F, ///< Not a word, like white space or punctuation mark | |
59 | word_number = 0x000F0, ///< Word that appear to be a number | |
60 | word_letter = 0x00F00, ///< Word that contains letters, excluding kana and ideographic characters | |
61 | word_kana = 0x0F000, ///< Word that contains kana characters | |
62 | word_ideo = 0xF0000, ///< Word that contains ideographic characters | |
63 | word_any = 0xFFFF0, ///< Any word including numbers, 0 is special flag, equivalent to 15 | |
64 | word_letters = 0xFFF00, ///< Any word, excluding numbers but including letters, kana and ideograms. | |
65 | word_kana_ideo = 0xFF000, ///< Word that includes kana or ideographic characters | |
66 | word_mask = 0xFFFFF; ///< Full word mask - select all possible variants | |
67 | /// @} | |
68 | ||
69 | /// | |
70 | /// \anchor bl_boundary_line_rules | |
71 | /// \name Flags that describe a type of line break | |
72 | /// @{ | |
73 | static const rule_type | |
74 | line_soft = 0x0F, ///< Soft line break: optional but not required | |
75 | line_hard = 0xF0, ///< Hard line break: like break is required (as per CR/LF) | |
76 | line_any = 0xFF, ///< Soft or Hard line break | |
77 | line_mask = 0xFF; ///< Select all types of line breaks | |
78 | ||
79 | /// @} | |
80 | ||
81 | /// | |
82 | /// \anchor bl_boundary_sentence_rules | |
83 | /// \name Flags that describe a type of sentence break | |
84 | /// | |
85 | /// @{ | |
86 | static const rule_type | |
87 | sentence_term = 0x0F, ///< \brief The sentence was terminated with a sentence terminator | |
88 | /// like ".", "!" possible followed by hard separator like CR, LF, PS | |
89 | sentence_sep = 0xF0, ///< \brief The sentence does not contain terminator like ".", "!" but ended with hard separator | |
90 | /// like CR, LF, PS or end of input. | |
91 | sentence_any = 0xFF, ///< Either first or second sentence break type;. | |
92 | sentence_mask = 0xFF; ///< Select all sentence breaking points | |
93 | ||
94 | ///@} | |
95 | ||
96 | /// | |
97 | /// \name Flags that describe a type of character break. | |
98 | /// | |
99 | /// At this point break iterator does not distinguish different | |
100 | /// kinds of characters so it is used for consistency. | |
101 | ///@{ | |
102 | static const rule_type | |
103 | character_any = 0xF, ///< Not in use, just for consistency | |
104 | character_mask = 0xF; ///< Select all character breaking points | |
105 | ||
106 | ///@} | |
107 | ||
108 | /// | |
109 | /// This function returns the mask that covers all variants for specific boundary type | |
110 | /// | |
111 | inline rule_type boundary_rule(boundary_type t) | |
112 | { | |
113 | switch(t) { | |
114 | case character: return character_mask; | |
115 | case word: return word_mask; | |
116 | case sentence: return sentence_mask; | |
117 | case line: return line_mask; | |
118 | default: return 0; | |
119 | } | |
120 | } | |
121 | ||
122 | /// | |
123 | ///@} | |
124 | /// | |
125 | ||
126 | } // boundary | |
127 | } // locale | |
128 | } // boost | |
129 | ||
130 | ||
131 | #ifdef BOOST_MSVC | |
132 | #pragma warning(pop) | |
133 | #endif | |
134 | ||
135 | #endif | |
136 | // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 |