]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/libs/locale/src/util/codecvt_converter.cpp
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / boost / libs / locale / src / util / codecvt_converter.cpp
CommitLineData
7c673cae
FG
1//
2// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3//
4// Distributed under the Boost Software License, Version 1.0. (See
5// accompanying file LICENSE_1_0.txt or copy at
6// http://www.boost.org/LICENSE_1_0.txt)
7//
8#define BOOST_LOCALE_SOURCE
9#include <boost/locale/generator.hpp>
10#include <boost/locale/encoding.hpp>
11#include <boost/locale/utf8_codecvt.hpp>
12
13#include "../encoding/conv.hpp"
14
15#include <boost/locale/util.hpp>
16
17#ifdef BOOST_MSVC
18# pragma warning(disable : 4244 4996) // loose data
19#endif
20
21#include <cstddef>
22#include <string.h>
23#include <vector>
24#include <algorithm>
25
26//#define DEBUG_CODECVT
27
28#ifdef DEBUG_CODECVT
29#include <iostream>
30#endif
31
32namespace boost {
33namespace locale {
34namespace util {
35
36 class utf8_converter : public base_converter {
37 public:
38 virtual int max_len() const
39 {
40 return 4;
41 }
42
43 virtual utf8_converter *clone() const
44 {
45 return new utf8_converter();
46 }
47
48 bool is_thread_safe() const
49 {
50 return true;
51 }
52
53 virtual uint32_t to_unicode(char const *&begin,char const *end)
54 {
55 char const *p=begin;
56
57 utf::code_point c = utf::utf_traits<char>::decode(p,end);
58
59 if(c==utf::illegal)
60 return illegal;
61
62 if(c==utf::incomplete)
63 return incomplete;
64
65 begin = p;
66 return c;
67 }
68
69 virtual uint32_t from_unicode(uint32_t u,char *begin,char const *end)
70 {
71 if(!utf::is_valid_codepoint(u))
72 return illegal;
73 int width = utf::utf_traits<char>::width(u);
74 std::ptrdiff_t d=end-begin;
75 if(d < width)
76 return incomplete;
77 utf::utf_traits<char>::encode(u,begin);
78 return width;
79 }
80 }; // utf8_converter
81
82 class simple_converter_impl {
83 public:
84
85 static const int hash_table_size = 1024;
86
87 simple_converter_impl(std::string const &encoding)
88 {
89 for(unsigned i=0;i<128;i++)
90 to_unicode_tbl_[i]=i;
91 for(unsigned i=128;i<256;i++) {
92 char buf[2] = { char(i) , 0 };
93 uint32_t uchar=utf::illegal;
94 try {
95 std::wstring const tmp = conv::to_utf<wchar_t>(buf,buf+1,encoding,conv::stop);
96 if(tmp.size() == 1) {
97 uchar = tmp[0];
98 }
99 else {
100 uchar = utf::illegal;
101 }
102 }
103 catch(conv::conversion_error const &/*e*/) {
104 uchar = utf::illegal;
105 }
106 to_unicode_tbl_[i]=uchar;
107 }
108 for(int i=0;i<hash_table_size;i++)
109 from_unicode_tbl_[i]=0;
110 for(unsigned i=1;i<256;i++) {
111 if(to_unicode_tbl_[i]!=utf::illegal) {
112 unsigned pos = to_unicode_tbl_[i] % hash_table_size;
113 while(from_unicode_tbl_[pos]!=0)
114 pos = (pos + 1) % hash_table_size;
115 from_unicode_tbl_[pos] = i;
116 }
117 }
118 }
119
120 uint32_t to_unicode(char const *&begin,char const *end) const
121 {
122 if(begin==end)
123 return utf::incomplete;
124 unsigned char c = *begin++;
125 return to_unicode_tbl_[c];
126 }
127 uint32_t from_unicode(uint32_t u,char *begin,char const *end) const
128 {
129 if(begin==end)
130 return utf::incomplete;
131 if(u==0) {
132 *begin = 0;
133 return 1;
134 }
135 unsigned pos = u % hash_table_size;
136 unsigned char c;
137 while((c=from_unicode_tbl_[pos])!=0 && to_unicode_tbl_[c]!=u)
138 pos = (pos + 1) % hash_table_size;
139 if(c==0)
140 return utf::illegal;
141 *begin = c;
142 return 1;
143 }
144 private:
145 uint32_t to_unicode_tbl_[256];
146 unsigned char from_unicode_tbl_[hash_table_size];
147 };
148
149 class simple_converter : public base_converter {
150 public:
151
152 virtual ~simple_converter()
153 {
154 }
155
156 simple_converter(std::string const &encoding) :
157 cvt_(encoding)
158 {
159 }
160
161 virtual int max_len() const
162 {
163 return 1;
164 }
165
166 virtual bool is_thread_safe() const
167 {
168 return true;
169 }
170 virtual base_converter *clone() const
171 {
172 return new simple_converter(*this);
173 }
174
175 virtual uint32_t to_unicode(char const *&begin,char const *end)
176 {
177 return cvt_.to_unicode(begin,end);
178 }
179 virtual uint32_t from_unicode(uint32_t u,char *begin,char const *end)
180 {
181 return cvt_.from_unicode(u,begin,end);
182 }
183 private:
184 simple_converter_impl cvt_;
185 };
186
187 template<typename CharType>
188 class simple_codecvt : public generic_codecvt<CharType,simple_codecvt<CharType> >
189 {
190 public:
191
192 simple_codecvt(std::string const &encoding,size_t refs = 0) :
193 generic_codecvt<CharType,simple_codecvt<CharType> >(refs),
194 cvt_(encoding)
195 {
196 }
197
198 struct state_type {};
199 static state_type initial_state(generic_codecvt_base::initial_convertion_state /* unused */)
200 {
201 return state_type();
202 }
203 static int max_encoding_length()
204 {
205 return 1;
206 }
207
208 utf::code_point to_unicode(state_type &,char const *&begin,char const *end) const
209 {
210 return cvt_.to_unicode(begin,end);
211 }
212
213 utf::code_point from_unicode(state_type &,utf::code_point u,char *begin,char const *end) const
214 {
215 return cvt_.from_unicode(u,begin,end);
216 }
217 private:
218 simple_converter_impl cvt_;
219
220 };
221
222 namespace {
223 char const *simple_encoding_table[] = {
224 "cp1250",
225 "cp1251",
226 "cp1252",
227 "cp1253",
228 "cp1254",
229 "cp1255",
230 "cp1256",
231 "cp1257",
232 "iso88591",
233 "iso885913",
234 "iso885915",
235 "iso88592",
236 "iso88593",
237 "iso88594",
238 "iso88595",
239 "iso88596",
240 "iso88597",
241 "iso88598",
242 "iso88599",
243 "koi8r",
244 "koi8u",
245 "usascii",
246 "windows1250",
247 "windows1251",
248 "windows1252",
249 "windows1253",
250 "windows1254",
251 "windows1255",
252 "windows1256",
253 "windows1257"
254 };
255
256 bool compare_strings(char const *l,char const *r)
257 {
258 return strcmp(l,r) < 0;
259 }
260 }
261
262 bool check_is_simple_encoding(std::string const &encoding)
263 {
264 std::string norm = conv::impl::normalize_encoding(encoding.c_str());
265 return std::binary_search<char const **>( simple_encoding_table,
266 simple_encoding_table + sizeof(simple_encoding_table)/sizeof(char const *),
267 norm.c_str(),
268 compare_strings);
269 return 0;
270 }
271
11fdf7f2
TL
272 #if !defined(BOOST_LOCALE_HIDE_AUTO_PTR) && !defined(BOOST_NO_AUTO_PTR)
273 std::auto_ptr<base_converter> create_utf8_converter()
274 {
275 std::auto_ptr<base_converter> res(create_utf8_converter_new_ptr());
276 return res;
277 }
7c673cae
FG
278 std::auto_ptr<base_converter> create_simple_converter(std::string const &encoding)
279 {
11fdf7f2 280 std::auto_ptr<base_converter> res(create_simple_converter_new_ptr(encoding));
7c673cae
FG
281 return res;
282 }
11fdf7f2
TL
283 std::locale create_codecvt(std::locale const &in,std::auto_ptr<base_converter> cvt,character_facet_type type)
284 {
285 return create_codecvt_from_pointer(in,cvt.release(),type);
286 }
287 #endif
288 #ifndef BOOST_NO_CXX11_SMART_PTR
289 std::unique_ptr<base_converter> create_utf8_converter_unique_ptr()
290 {
291 std::unique_ptr<base_converter> res(create_utf8_converter_new_ptr());
292 return res;
293 }
294 std::unique_ptr<base_converter> create_simple_converter_unique_ptr(std::string const &encoding)
7c673cae 295 {
11fdf7f2 296 std::unique_ptr<base_converter> res(create_simple_converter_new_ptr(encoding));
7c673cae
FG
297 return res;
298 }
11fdf7f2
TL
299 std::locale create_codecvt(std::locale const &in,std::unique_ptr<base_converter> cvt,character_facet_type type)
300 {
301 return create_codecvt_from_pointer(in,cvt.release(),type);
302 }
303 #endif
304
305 base_converter *create_simple_converter_new_ptr(std::string const &encoding)
306 {
307 if(check_is_simple_encoding(encoding))
308 return new simple_converter(encoding);
309 return 0;
310 }
311
312 base_converter *create_utf8_converter_new_ptr()
313 {
314 return new utf8_converter();
315 }
7c673cae
FG
316
317 template<typename CharType>
318 class code_converter : public generic_codecvt<CharType,code_converter<CharType> >
319 {
320 public:
11fdf7f2
TL
321 #ifndef BOOST_NO_CXX11_SMART_PTR
322 typedef std::unique_ptr<base_converter> base_converter_ptr;
323 #define PTR_TRANS(x) std::move((x))
324 #else
325 typedef std::auto_ptr<base_converter> base_converter_ptr;
326 #define PTR_TRANS(x) (x)
327 #endif
328 typedef base_converter_ptr state_type;
329
330 code_converter(base_converter_ptr cvt,size_t refs = 0) :
7c673cae 331 generic_codecvt<CharType,code_converter<CharType> >(refs),
11fdf7f2 332 cvt_(PTR_TRANS(cvt))
7c673cae
FG
333 {
334 max_len_ = cvt_->max_len();
335 thread_safe_ = cvt_->is_thread_safe();
336 }
337
7c673cae
FG
338
339 int max_encoding_length() const
340 {
341 return max_len_;
342 }
343
11fdf7f2 344 base_converter_ptr initial_state(generic_codecvt_base::initial_convertion_state /* unused */) const
7c673cae 345 {
11fdf7f2 346 base_converter_ptr r;
7c673cae
FG
347 if(!thread_safe_)
348 r.reset(cvt_->clone());
349 return r;
350 }
351
11fdf7f2 352 utf::code_point to_unicode(base_converter_ptr &ptr,char const *&begin,char const *end) const
7c673cae
FG
353 {
354 if(thread_safe_)
355 return cvt_->to_unicode(begin,end);
356 else
357 return ptr->to_unicode(begin,end);
358 }
359
11fdf7f2 360 utf::code_point from_unicode(base_converter_ptr &ptr,utf::code_point u,char *begin,char const *end) const
7c673cae
FG
361 {
362 if(thread_safe_)
363 return cvt_->from_unicode(u,begin,end);
364 else
365 return ptr->from_unicode(u,begin,end);
366 }
367
368 private:
11fdf7f2 369 base_converter_ptr cvt_;
7c673cae
FG
370 int max_len_;
371 bool thread_safe_;
372 };
373
374
11fdf7f2 375 std::locale create_codecvt_from_pointer(std::locale const &in,base_converter *pcvt,character_facet_type type)
7c673cae 376 {
11fdf7f2 377 code_converter<char>::base_converter_ptr cvt(pcvt);
7c673cae
FG
378 if(!cvt.get())
379 cvt.reset(new base_converter());
380 switch(type) {
381 case char_facet:
11fdf7f2 382 return std::locale(in,new code_converter<char>(PTR_TRANS(cvt)));
7c673cae 383 case wchar_t_facet:
11fdf7f2 384 return std::locale(in,new code_converter<wchar_t>(PTR_TRANS(cvt)));
7c673cae
FG
385 #if defined(BOOST_LOCALE_ENABLE_CHAR16_T) && !defined(BOOST_NO_CHAR16_T_CODECVT)
386 case char16_t_facet:
11fdf7f2 387 return std::locale(in,new code_converter<char16_t>(PTR_TRANS(cvt)));
7c673cae
FG
388 #endif
389 #if defined(BOOST_LOCALE_ENABLE_CHAR32_T) && !defined(BOOST_NO_CHAR32_T_CODECVT)
390 case char32_t_facet:
11fdf7f2 391 return std::locale(in,new code_converter<char32_t>(PTR_TRANS(cvt)));
7c673cae
FG
392 #endif
393 default:
394 return in;
395 }
396 }
397
398
399 ///
400 /// Install utf8 codecvt to UTF-16 or UTF-32 into locale \a in and return
401 /// new locale that is based on \a in and uses new facet.
402 ///
403 std::locale create_utf8_codecvt(std::locale const &in,character_facet_type type)
404 {
405 switch(type) {
406 case char_facet:
407 return std::locale(in,new utf8_codecvt<char>());
408 case wchar_t_facet:
409 return std::locale(in,new utf8_codecvt<wchar_t>());
410 #if defined(BOOST_LOCALE_ENABLE_CHAR16_T) && !defined(BOOST_NO_CHAR16_T_CODECVT)
411 case char16_t_facet:
412 return std::locale(in,new utf8_codecvt<char16_t>());
413 #endif
414 #if defined(BOOST_LOCALE_ENABLE_CHAR32_T) && !defined(BOOST_NO_CHAR32_T_CODECVT)
415 case char32_t_facet:
416 return std::locale(in,new utf8_codecvt<char32_t>());
417 #endif
418 default:
419 return in;
420 }
421 }
422
423 ///
424 /// This function installs codecvt that can be used for conversion between single byte
425 /// character encodings like ISO-8859-1, koi8-r, windows-1255 and Unicode code points,
426 ///
427 /// Throws invalid_charset_error if the chacater set is not supported or isn't single byte character
428 /// set
429 std::locale create_simple_codecvt(std::locale const &in,std::string const &encoding,character_facet_type type)
430 {
431 if(!check_is_simple_encoding(encoding))
432 throw boost::locale::conv::invalid_charset_error("Invalid simple encoding " + encoding);
433
434 switch(type) {
435 case char_facet:
436 return std::locale(in,new simple_codecvt<char>(encoding));
437 case wchar_t_facet:
438 return std::locale(in,new simple_codecvt<wchar_t>(encoding));
439 #if defined(BOOST_LOCALE_ENABLE_CHAR16_T) && !defined(BOOST_NO_CHAR16_T_CODECVT)
440 case char16_t_facet:
441 return std::locale(in,new simple_codecvt<char16_t>(encoding));
442 #endif
443 #if defined(BOOST_LOCALE_ENABLE_CHAR32_T) && !defined(BOOST_NO_CHAR32_T_CODECVT)
444 case char32_t_facet:
445 return std::locale(in,new simple_codecvt<char32_t>(encoding));
446 #endif
447 default:
448 return in;
449 }
450 }
451
452
453
454} // util
455} // locale
456} // boost
457
458// vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4