]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // |
2 | // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) | |
3 | // | |
4 | // Distributed under the Boost Software License, Version 1.0. (See | |
5 | // accompanying file LICENSE_1_0.txt or copy at | |
6 | // http://www.boost.org/LICENSE_1_0.txt) | |
7 | // | |
8 | #ifndef BOOST_LOCALE_CONVERTER_HPP_INCLUDED | |
9 | #define BOOST_LOCALE_CONVERTER_HPP_INCLUDED | |
10 | ||
11 | #include <boost/locale/config.hpp> | |
12 | #ifdef BOOST_MSVC | |
13 | # pragma warning(push) | |
14 | # pragma warning(disable : 4275 4251 4231 4660) | |
15 | #endif | |
16 | #include <locale> | |
17 | ||
18 | ||
19 | namespace boost { | |
20 | namespace locale { | |
21 | ||
22 | /// | |
23 | /// \defgroup convert Text Conversions | |
24 | /// | |
25 | /// This module provides various function for string manipulation like Unicode normalization, case conversion etc. | |
26 | /// @{ | |
27 | /// | |
28 | ||
29 | ||
30 | /// | |
31 | /// \brief This class provides base flags for text manipulation. It is used as base for converter facet. | |
32 | /// | |
33 | class converter_base { | |
34 | public: | |
35 | /// | |
36 | /// The flag used for facet - the type of operation to perform | |
37 | /// | |
38 | typedef enum { | |
39 | normalization, ///< Apply Unicode normalization on the text | |
40 | upper_case, ///< Convert text to upper case | |
41 | lower_case, ///< Convert text to lower case | |
42 | case_folding, ///< Fold case in the text | |
43 | title_case ///< Convert text to title case | |
44 | } conversion_type; | |
45 | }; | |
46 | ||
47 | template<typename CharType> | |
48 | class converter; | |
49 | ||
50 | #ifdef BOOST_LOCALE_DOXYGEN | |
51 | /// | |
52 | /// \brief The facet that implements text manipulation | |
53 | /// | |
54 | /// It is used to performs text conversion operations defined by \ref conversion_type. It is specialized | |
55 | /// for four types of characters \c char, \c wchar_t, \c char16_t, \c char32_t | |
56 | /// | |
57 | template<typename Char> | |
58 | class BOOST_LOCALE_DECL converter: public converter_base, public std::locale::facet { | |
59 | public: | |
60 | /// Locale identification | |
61 | static std::locale::id id; | |
62 | ||
63 | /// Standard constructor | |
64 | converter(size_t refs = 0) : std::locale::facet(refs) | |
65 | { | |
66 | } | |
67 | /// | |
68 | /// Convert text in range [\a begin, \a end) according to conversion method \a how. Parameter | |
69 | /// \a flags is used for specification of normalization method like nfd, nfc etc. | |
70 | /// | |
71 | virtual std::basic_string<Char> convert(conversion_type how,Char const *begin,Char const *end,int flags = 0) const = 0; | |
72 | #if defined (__SUNPRO_CC) && defined (_RWSTD_VER) | |
73 | std::locale::id& __get_id (void) const { return id; } | |
74 | #endif | |
75 | }; | |
76 | #else | |
77 | ||
78 | template<> | |
79 | class BOOST_LOCALE_DECL converter<char> : public converter_base, public std::locale::facet { | |
80 | public: | |
81 | static std::locale::id id; | |
82 | ||
83 | converter(size_t refs = 0) : std::locale::facet(refs) | |
84 | { | |
85 | } | |
86 | virtual std::string convert(conversion_type how,char const *begin,char const *end,int flags = 0) const = 0; | |
87 | #if defined (__SUNPRO_CC) && defined (_RWSTD_VER) | |
88 | std::locale::id& __get_id (void) const { return id; } | |
89 | #endif | |
90 | }; | |
91 | ||
92 | template<> | |
93 | class BOOST_LOCALE_DECL converter<wchar_t> : public converter_base, public std::locale::facet { | |
94 | public: | |
95 | static std::locale::id id; | |
96 | converter(size_t refs = 0) : std::locale::facet(refs) | |
97 | { | |
98 | } | |
99 | virtual std::wstring convert(conversion_type how,wchar_t const *begin,wchar_t const *end,int flags = 0) const = 0; | |
100 | #if defined (__SUNPRO_CC) && defined (_RWSTD_VER) | |
101 | std::locale::id& __get_id (void) const { return id; } | |
102 | #endif | |
103 | }; | |
104 | ||
105 | #ifdef BOOST_LOCALE_ENABLE_CHAR16_T | |
106 | template<> | |
107 | class BOOST_LOCALE_DECL converter<char16_t> : public converter_base, public std::locale::facet { | |
108 | public: | |
109 | static std::locale::id id; | |
110 | converter(size_t refs = 0) : std::locale::facet(refs) | |
111 | { | |
112 | } | |
113 | virtual std::u16string convert(conversion_type how,char16_t const *begin,char16_t const *end,int flags = 0) const = 0; | |
114 | #if defined (__SUNPRO_CC) && defined (_RWSTD_VER) | |
115 | std::locale::id& __get_id (void) const { return id; } | |
116 | #endif | |
117 | }; | |
118 | #endif | |
119 | ||
120 | #ifdef BOOST_LOCALE_ENABLE_CHAR32_T | |
121 | template<> | |
122 | class BOOST_LOCALE_DECL converter<char32_t> : public converter_base, public std::locale::facet { | |
123 | public: | |
124 | static std::locale::id id; | |
125 | converter(size_t refs = 0) : std::locale::facet(refs) | |
126 | { | |
127 | } | |
128 | virtual std::u32string convert(conversion_type how,char32_t const *begin,char32_t const *end,int flags = 0) const = 0; | |
129 | #if defined (__SUNPRO_CC) && defined (_RWSTD_VER) | |
130 | std::locale::id& __get_id (void) const { return id; } | |
131 | #endif | |
132 | }; | |
133 | #endif | |
134 | ||
135 | #endif | |
136 | ||
137 | /// | |
138 | /// The type that defined <a href="http://unicode.org/reports/tr15/#Norm_Forms">normalization form</a> | |
139 | /// | |
140 | ||
141 | typedef enum { | |
142 | norm_nfd, ///< Canonical decomposition | |
143 | norm_nfc, ///< Canonical decomposition followed by canonical composition | |
144 | norm_nfkd, ///< Compatibility decomposition | |
145 | norm_nfkc, ///< Compatibility decomposition followed by canonical composition. | |
146 | norm_default = norm_nfc, ///< Default normalization - canonical decomposition followed by canonical composition | |
147 | } norm_type; | |
148 | ||
149 | /// | |
150 | /// Normalize Unicode string \a str according to \ref norm_type "normalization form" \a n | |
151 | /// | |
152 | /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take | |
153 | /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside | |
154 | /// of a Unicode character set. | |
155 | /// | |
156 | /// \note throws std::bad_cast if loc does not have \ref converter facet installed | |
157 | /// | |
158 | template<typename CharType> | |
159 | std::basic_string<CharType> normalize(std::basic_string<CharType> const &str,norm_type n=norm_default,std::locale const &loc=std::locale()) | |
160 | { | |
161 | return std::use_facet<converter<CharType> >(loc).convert(converter_base::normalization,str.data(),str.data() + str.size(),n); | |
162 | } | |
163 | ||
164 | /// | |
165 | /// Normalize NUL terminated Unicode string \a str according to \ref norm_type "normalization form" \a n | |
166 | /// | |
167 | /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take | |
168 | /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside | |
169 | /// of a Unicode character set. | |
170 | /// | |
171 | /// \note throws std::bad_cast if loc does not have \ref converter facet installed | |
172 | /// | |
173 | template<typename CharType> | |
174 | std::basic_string<CharType> normalize(CharType const *str,norm_type n=norm_default,std::locale const &loc=std::locale()) | |
175 | { | |
176 | CharType const *end=str; | |
177 | while(*end) | |
178 | end++; | |
179 | return std::use_facet<converter<CharType> >(loc).convert(converter_base::normalization,str,end,n); | |
180 | } | |
181 | ||
182 | /// | |
183 | /// Normalize Unicode string in range [begin,end) according to \ref norm_type "normalization form" \a n | |
184 | /// | |
185 | /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take | |
186 | /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside | |
187 | /// of a Unicode character set. | |
188 | /// | |
189 | /// \note throws std::bad_cast if loc does not have \ref converter facet installed | |
190 | /// | |
191 | template<typename CharType> | |
192 | std::basic_string<CharType> normalize( CharType const *begin, | |
193 | CharType const *end, | |
194 | norm_type n=norm_default, | |
195 | std::locale const &loc=std::locale()) | |
196 | { | |
197 | return std::use_facet<converter<CharType> >(loc).convert(converter_base::normalization,begin,end,n); | |
198 | } | |
199 | ||
200 | /////////////////////////////////////////////////// | |
201 | ||
202 | /// | |
203 | /// Convert a string \a str to upper case according to locale \a loc | |
204 | /// | |
205 | /// \note throws std::bad_cast if loc does not have \ref converter facet installed | |
206 | /// | |
207 | ||
208 | template<typename CharType> | |
209 | std::basic_string<CharType> to_upper(std::basic_string<CharType> const &str,std::locale const &loc=std::locale()) | |
210 | { | |
211 | return std::use_facet<converter<CharType> >(loc).convert(converter_base::upper_case,str.data(),str.data()+str.size()); | |
212 | } | |
213 | ||
214 | /// | |
215 | /// Convert a NUL terminated string \a str to upper case according to locale \a loc | |
216 | /// | |
217 | /// \note throws std::bad_cast if loc does not have \ref converter facet installed | |
218 | /// | |
219 | template<typename CharType> | |
220 | std::basic_string<CharType> to_upper(CharType const *str,std::locale const &loc=std::locale()) | |
221 | { | |
222 | CharType const *end=str; | |
223 | while(*end) | |
224 | end++; | |
225 | return std::use_facet<converter<CharType> >(loc).convert(converter_base::upper_case,str,end); | |
226 | } | |
227 | ||
228 | /// | |
229 | /// Convert a string in range [begin,end) to upper case according to locale \a loc | |
230 | /// | |
231 | /// \note throws std::bad_cast if loc does not have \ref converter facet installed | |
232 | /// | |
233 | template<typename CharType> | |
234 | std::basic_string<CharType> to_upper(CharType const *begin,CharType const *end,std::locale const &loc=std::locale()) | |
235 | { | |
236 | return std::use_facet<converter<CharType> >(loc).convert(converter_base::upper_case,begin,end); | |
237 | } | |
238 | ||
239 | /////////////////////////////////////////////////// | |
240 | ||
241 | /// | |
242 | /// Convert a string \a str to lower case according to locale \a loc | |
243 | /// | |
244 | /// \note throws std::bad_cast if loc does not have \ref converter facet installed | |
245 | /// | |
246 | ||
247 | template<typename CharType> | |
248 | std::basic_string<CharType> to_lower(std::basic_string<CharType> const &str,std::locale const &loc=std::locale()) | |
249 | { | |
250 | return std::use_facet<converter<CharType> >(loc).convert(converter_base::lower_case,str.data(),str.data()+str.size()); | |
251 | } | |
252 | ||
253 | /// | |
254 | /// Convert a NUL terminated string \a str to lower case according to locale \a loc | |
255 | /// | |
256 | /// \note throws std::bad_cast if loc does not have \ref converter facet installed | |
257 | /// | |
258 | template<typename CharType> | |
259 | std::basic_string<CharType> to_lower(CharType const *str,std::locale const &loc=std::locale()) | |
260 | { | |
261 | CharType const *end=str; | |
262 | while(*end) | |
263 | end++; | |
264 | return std::use_facet<converter<CharType> >(loc).convert(converter_base::lower_case,str,end); | |
265 | } | |
266 | ||
267 | /// | |
268 | /// Convert a string in range [begin,end) to lower case according to locale \a loc | |
269 | /// | |
270 | /// \note throws std::bad_cast if loc does not have \ref converter facet installed | |
271 | /// | |
272 | template<typename CharType> | |
273 | std::basic_string<CharType> to_lower(CharType const *begin,CharType const *end,std::locale const &loc=std::locale()) | |
274 | { | |
275 | return std::use_facet<converter<CharType> >(loc).convert(converter_base::lower_case,begin,end); | |
276 | } | |
277 | /////////////////////////////////////////////////// | |
278 | ||
279 | /// | |
280 | /// Convert a string \a str to title case according to locale \a loc | |
281 | /// | |
282 | /// \note throws std::bad_cast if loc does not have \ref converter facet installed | |
283 | /// | |
284 | ||
285 | template<typename CharType> | |
286 | std::basic_string<CharType> to_title(std::basic_string<CharType> const &str,std::locale const &loc=std::locale()) | |
287 | { | |
288 | return std::use_facet<converter<CharType> >(loc).convert(converter_base::title_case,str.data(),str.data()+str.size()); | |
289 | } | |
290 | ||
291 | /// | |
292 | /// Convert a NUL terminated string \a str to title case according to locale \a loc | |
293 | /// | |
294 | /// \note throws std::bad_cast if loc does not have \ref converter facet installed | |
295 | /// | |
296 | template<typename CharType> | |
297 | std::basic_string<CharType> to_title(CharType const *str,std::locale const &loc=std::locale()) | |
298 | { | |
299 | CharType const *end=str; | |
300 | while(*end) | |
301 | end++; | |
302 | return std::use_facet<converter<CharType> >(loc).convert(converter_base::title_case,str,end); | |
303 | } | |
304 | ||
305 | /// | |
306 | /// Convert a string in range [begin,end) to title case according to locale \a loc | |
307 | /// | |
308 | /// \note throws std::bad_cast if loc does not have \ref converter facet installed | |
309 | /// | |
310 | template<typename CharType> | |
311 | std::basic_string<CharType> to_title(CharType const *begin,CharType const *end,std::locale const &loc=std::locale()) | |
312 | { | |
313 | return std::use_facet<converter<CharType> >(loc).convert(converter_base::title_case,begin,end); | |
314 | } | |
315 | ||
316 | /////////////////////////////////////////////////// | |
317 | ||
318 | /// | |
319 | /// Fold case of a string \a str according to locale \a loc | |
320 | /// | |
321 | /// \note throws std::bad_cast if loc does not have \ref converter facet installed | |
322 | /// | |
323 | ||
324 | template<typename CharType> | |
325 | std::basic_string<CharType> fold_case(std::basic_string<CharType> const &str,std::locale const &loc=std::locale()) | |
326 | { | |
327 | return std::use_facet<converter<CharType> >(loc).convert(converter_base::case_folding,str.data(),str.data()+str.size()); | |
328 | } | |
329 | ||
330 | /// | |
331 | /// Fold case of a NUL terminated string \a str according to locale \a loc | |
332 | /// | |
333 | /// \note throws std::bad_cast if loc does not have \ref converter facet installed | |
334 | /// | |
335 | template<typename CharType> | |
336 | std::basic_string<CharType> fold_case(CharType const *str,std::locale const &loc=std::locale()) | |
337 | { | |
338 | CharType const *end=str; | |
339 | while(*end) | |
340 | end++; | |
341 | return std::use_facet<converter<CharType> >(loc).convert(converter_base::case_folding,str,end); | |
342 | } | |
343 | ||
344 | /// | |
345 | /// Fold case of a string in range [begin,end) according to locale \a loc | |
346 | /// | |
347 | /// \note throws std::bad_cast if loc does not have \ref converter facet installed | |
348 | /// | |
349 | template<typename CharType> | |
350 | std::basic_string<CharType> fold_case(CharType const *begin,CharType const *end,std::locale const &loc=std::locale()) | |
351 | { | |
352 | return std::use_facet<converter<CharType> >(loc).convert(converter_base::case_folding,begin,end); | |
353 | } | |
354 | ||
355 | /// | |
356 | ///@} | |
357 | /// | |
358 | } // locale | |
359 | ||
360 | } // boost | |
361 | ||
362 | #ifdef BOOST_MSVC | |
363 | #pragma warning(pop) | |
364 | #endif | |
365 | ||
366 | ||
367 | #endif | |
368 | ||
369 | /// | |
370 | /// \example conversions.cpp | |
371 | /// | |
372 | /// Example of using various text conversion functions. | |
373 | /// | |
374 | /// \example wconversions.cpp | |
375 | /// | |
376 | /// Example of using various text conversion functions with wide strings. | |
377 | /// | |
378 | ||
379 | // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 | |
380 |