]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/boost/locale/util.hpp
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / boost / boost / locale / util.hpp
CommitLineData
7c673cae
FG
1//
2// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3//
4// Distributed under the Boost Software License, Version 1.0. (See
5// accompanying file LICENSE_1_0.txt or copy at
6// http://www.boost.org/LICENSE_1_0.txt)
7//
8#ifndef BOOST_LOCALE_UTIL_HPP
9#define BOOST_LOCALE_UTIL_HPP
10#include <locale>
11#include <typeinfo>
12#include <boost/cstdint.hpp>
13#include <boost/locale/utf.hpp>
14#include <boost/locale/generator.hpp>
15#include <boost/assert.hpp>
16
17#include <vector>
18namespace boost {
19namespace locale {
20///
21/// \brief This namespace provides various utility function useful for Boost.Locale backends
22/// implementations
23///
24namespace util {
25
26 ///
27 /// \brief Return default system locale name in POSIX format.
28 ///
29 /// This function tries to detect the locale using, LC_CTYPE, LC_ALL and LANG environment
30 /// variables in this order and if all of them unset, in POSIX platforms it returns "C"
31 ///
32 /// On Windows additionally to check the above environment variables, this function
33 /// tries to creates locale name from ISO-339 and ISO-3199 country codes defined
34 /// for user default locale.
35 /// If \a use_utf8_on_windows is true it sets the encoding to UTF-8, otherwise, if system
36 /// locale supports ANSI code-page it defines the ANSI encoding like windows-1252, otherwise it fall-backs
37 /// to UTF-8 encoding if ANSI code-page is not available.
38 ///
39 BOOST_LOCALE_DECL
40 std::string get_system_locale(bool use_utf8_on_windows = false);
41
42 ///
43 /// \brief Installs information facet to locale in based on locale name \a name
44 ///
45 /// This function installs boost::locale::info facet into the locale \a in and returns
46 /// newly created locale.
47 ///
48 /// Note: all information is based only on parsing of string \a name;
49 ///
50 /// The name has following format: language[_COUNTRY][.encoding][\@variant]
51 /// Where language is ISO-639 language code like "en" or "ru", COUNTRY is ISO-3166
52 /// country identifier like "US" or "RU". the Encoding is a charracter set name
53 /// like UTF-8 or ISO-8859-1. Variant is backend specific variant like \c euro or
54 /// calendar=hebrew.
55 ///
56 /// If some parameters are missing they are specified as blanks, default encoding
57 /// is assumed to be US-ASCII and missing language is assumed to be "C"
58 ///
59 BOOST_LOCALE_DECL
60 std::locale create_info(std::locale const &in,std::string const &name);
61
62
63 ///
64 /// \brief This class represent a simple stateless converter from UCS-4 and to UCS-4 for
65 /// each single code point
66 ///
67 /// This class is used for creation of std::codecvt facet for converting utf-16/utf-32 encoding
68 /// to encoding supported by this converter
69 ///
70 /// Please note, this converter should be fully stateless. Fully stateless means it should
71 /// never assume that it is called in any specific order on the text. Even if the
72 /// encoding itself seems to be stateless like windows-1255 or shift-jis, some
73 /// encoders (most notably iconv) can actually compose several code-point into one or
74 /// decompose them in case composite characters are found. So be very careful when implementing
75 /// these converters for certain character set.
76 ///
77 class base_converter {
78 public:
79
80 ///
81 /// This value should be returned when an illegal input sequence or code-point is observed:
82 /// For example if a UCS-32 code-point is in the range reserved for UTF-16 surrogates
83 /// or an invalid UTF-8 sequence is found
84 ///
85 static const uint32_t illegal=utf::illegal;
86
87 ///
88 /// This value is returned in following cases: The of incomplete input sequence was found or
89 /// insufficient output buffer was provided so complete output could not be written.
90 ///
91 static const uint32_t incomplete=utf::incomplete;
92
93 virtual ~base_converter()
94 {
95 }
96 ///
97 /// Return the maximal length that one Unicode code-point can be converted to, for example
98 /// for UTF-8 it is 4, for Shift-JIS it is 2 and ISO-8859-1 is 1
99 ///
100 virtual int max_len() const
101 {
102 return 1;
103 }
104 ///
105 /// Returns true if calling the functions from_unicode, to_unicode, and max_len is thread safe.
106 ///
107 /// Rule of thumb: if this class' implementation uses simple tables that are unchanged
108 /// or is purely algorithmic like UTF-8 - so it does not share any mutable bit for
109 /// independent to_unicode, from_unicode calls, you may set it to true, otherwise,
110 /// for example if you use iconv_t descriptor or UConverter as conversion object return false,
111 /// and this object will be cloned for each use.
112 ///
113 virtual bool is_thread_safe() const
114 {
115 return false;
116 }
117 ///
118 /// Create a polymorphic copy of this object, usually called only if is_thread_safe() return false
119 ///
120 virtual base_converter *clone() const
121 {
122 BOOST_ASSERT(typeid(*this)==typeid(base_converter));
123 return new base_converter();
124 }
125
126 ///
127 /// Convert a single character starting at begin and ending at most at end to Unicode code-point.
128 ///
129 /// if valid input sequence found in [\a begin,\a code_point_end) such as \a begin < \a code_point_end && \a code_point_end <= \a end
130 /// it is converted to its Unicode code point equivalent, \a begin is set to \a code_point_end
131 ///
132 /// if incomplete input sequence found in [\a begin,\a end), i.e. there my be such \a code_point_end that \a code_point_end > \a end
133 /// and [\a begin, \a code_point_end) would be valid input sequence, then \a incomplete is returned begin stays unchanged, for example
134 /// for UTF-8 conversion a *begin = 0xc2, \a begin +1 = \a end is such situation.
135 ///
136 /// if invalid input sequence found, i.e. there is a sequence [\a begin, \a code_point_end) such as \a code_point_end <= \a end
137 /// that is illegal for this encoding, \a illegal is returned and begin stays unchanged. For example if *begin = 0xFF and begin < end
138 /// for UTF-8, then \a illegal is returned.
139 ///
140 ///
141 virtual uint32_t to_unicode(char const *&begin,char const *end)
142 {
143 if(begin == end)
144 return incomplete;
145 unsigned char cp = *begin;
146 if(cp <= 0x7F) {
147 begin++;
148 return cp;
149 }
150 return illegal;
151 }
152 ///
153 /// Convert a single code-point \a u into encoding and store it in [begin,end) range.
154 ///
155 /// If u is invalid Unicode code-point, or it can not be mapped correctly to represented character set,
156 /// \a illegal should be returned
157 ///
158 /// If u can be converted to a sequence of bytes c1, ... , cN (1<= N <= max_len() ) then
159 ///
160 /// -# If end - begin >= N, c1, ... cN are written starting at begin and N is returned
161 /// -# If end - begin < N, incomplete is returned, it is unspecified what would be
162 /// stored in bytes in range [begin,end)
163
164 virtual uint32_t from_unicode(uint32_t u,char *begin,char const *end)
165 {
166 if(begin==end)
167 return incomplete;
168 if(u >= 0x80)
169 return illegal;
170 *begin = static_cast<char>(u);
171 return 1;
172 }
173 };
174
11fdf7f2 175 #if !defined(BOOST_LOCALE_HIDE_AUTO_PTR) && !defined(BOOST_NO_AUTO_PTR)
7c673cae
FG
176 ///
177 /// This function creates a \a base_converter that can be used for conversion between UTF-8 and
178 /// unicode code points
179 ///
180 BOOST_LOCALE_DECL std::auto_ptr<base_converter> create_utf8_converter();
181 ///
182 /// This function creates a \a base_converter that can be used for conversion between single byte
183 /// character encodings like ISO-8859-1, koi8-r, windows-1255 and Unicode code points,
184 ///
185 /// If \a encoding is not supported, empty pointer is returned. You should check if
186 /// std::auto_ptr<base_converter>::get() != 0
187 ///
188 BOOST_LOCALE_DECL std::auto_ptr<base_converter> create_simple_converter(std::string const &encoding);
189
190
191 ///
192 /// Install codecvt facet into locale \a in and return new locale that is based on \a in and uses new
193 /// facet.
194 ///
195 /// codecvt facet would convert between narrow and wide/char16_t/char32_t encodings using \a cvt converter.
196 /// If \a cvt is null pointer, always failure conversion would be used that fails on every first input or output.
197 ///
198 /// Note: the codecvt facet handles both UTF-16 and UTF-32 wide encodings, it knows to break and join
199 /// Unicode code-points above 0xFFFF to and from surrogate pairs correctly. \a cvt should be unaware
200 /// of wide encoding type
201 ///
202 BOOST_LOCALE_DECL
203 std::locale create_codecvt(std::locale const &in,std::auto_ptr<base_converter> cvt,character_facet_type type);
11fdf7f2
TL
204 #endif
205
206 #ifndef BOOST_NO_CXX11_SMART_PTR
207 ///
208 /// This function creates a \a base_converter that can be used for conversion between UTF-8 and
209 /// unicode code points
210 ///
211 BOOST_LOCALE_DECL std::unique_ptr<base_converter> create_utf8_converter_unique_ptr();
212 ///
213 /// This function creates a \a base_converter that can be used for conversion between single byte
214 /// character encodings like ISO-8859-1, koi8-r, windows-1255 and Unicode code points,
215 ///
216 /// If \a encoding is not supported, empty pointer is returned. You should check if
217 /// std::unique_ptr<base_converter>::get() != 0
218 ///
219 BOOST_LOCALE_DECL std::unique_ptr<base_converter> create_simple_converter_unique_ptr(std::string const &encoding);
220
221 ///
222 /// Install codecvt facet into locale \a in and return new locale that is based on \a in and uses new
223 /// facet.
224 ///
225 /// codecvt facet would convert between narrow and wide/char16_t/char32_t encodings using \a cvt converter.
226 /// If \a cvt is null pointer, always failure conversion would be used that fails on every first input or output.
227 ///
228 /// Note: the codecvt facet handles both UTF-16 and UTF-32 wide encodings, it knows to break and join
229 /// Unicode code-points above 0xFFFF to and from surrogate pairs correctly. \a cvt should be unaware
230 /// of wide encoding type
231 ///
232 BOOST_LOCALE_DECL
233 std::locale create_codecvt(std::locale const &in,std::unique_ptr<base_converter> cvt,character_facet_type type);
234 #endif
235
236 ///
237 /// This function creates a \a base_converter that can be used for conversion between UTF-8 and
238 /// unicode code points
239 ///
240 BOOST_LOCALE_DECL base_converter *create_utf8_converter_new_ptr();
241 ///
242 /// This function creates a \a base_converter that can be used for conversion between single byte
243 /// character encodings like ISO-8859-1, koi8-r, windows-1255 and Unicode code points,
244 ///
245 /// If \a encoding is not supported, empty pointer is returned. You should check if
246 /// std::unique_ptr<base_converter>::get() != 0
247 ///
248 BOOST_LOCALE_DECL base_converter *create_simple_converter_new_ptr(std::string const &encoding);
249
250 ///
251 /// Install codecvt facet into locale \a in and return new locale that is based on \a in and uses new
252 /// facet.
253 ///
254 /// codecvt facet would convert between narrow and wide/char16_t/char32_t encodings using \a cvt converter.
255 /// If \a cvt is null pointer, always failure conversion would be used that fails on every first input or output.
256 ///
257 /// Note: the codecvt facet handles both UTF-16 and UTF-32 wide encodings, it knows to break and join
258 /// Unicode code-points above 0xFFFF to and from surrogate pairs correctly. \a cvt should be unaware
259 /// of wide encoding type
260 ///
261 /// ownership of cvt is transfered
262 ///
263 BOOST_LOCALE_DECL
264 std::locale create_codecvt_from_pointer(std::locale const &in,base_converter *cvt,character_facet_type type);
7c673cae
FG
265
266 ///
267 /// Install utf8 codecvt to UTF-16 or UTF-32 into locale \a in and return
268 /// new locale that is based on \a in and uses new facet.
269 ///
270 BOOST_LOCALE_DECL
271 std::locale create_utf8_codecvt(std::locale const &in,character_facet_type type);
272
273 ///
274 /// This function installs codecvt that can be used for conversion between single byte
275 /// character encodings like ISO-8859-1, koi8-r, windows-1255 and Unicode code points,
276 ///
277 /// Throws boost::locale::conv::invalid_charset_error if the chacater set is not supported or isn't single byte character
278 /// set
279 BOOST_LOCALE_DECL
280 std::locale create_simple_codecvt(std::locale const &in,std::string const &encoding,character_facet_type type);
281} // util
282} // locale
283} // boost
284
285#endif
286// vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4