]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/locale/src/shared/message.cpp
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / boost / libs / locale / src / shared / message.cpp
1 //
2 // Copyright (c) 2009-2015 Artyom Beilis (Tonkikh)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See
5 // accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #define BOOST_LOCALE_SOURCE
9 #define BOOST_DETAIL_NO_CONTAINER_FWD
10 #include <boost/config.hpp>
11 #include <boost/version.hpp>
12 #include <boost/locale/message.hpp>
13 #include <boost/locale/gnu_gettext.hpp>
14 #include <boost/shared_ptr.hpp>
15 #include <boost/locale/encoding.hpp>
16 #ifdef BOOST_MSVC
17 # pragma warning(disable : 4996)
18 #endif
19
20
21 #if BOOST_VERSION >= 103600
22 #define BOOST_LOCALE_UNORDERED_CATALOG
23 #endif
24
25 #ifdef BOOST_LOCALE_UNORDERED_CATALOG
26 #include <boost/unordered_map.hpp>
27 #else
28 #include <map>
29 #endif
30
31 #include <iostream>
32
33
34 #include "mo_hash.hpp"
35 #include "mo_lambda.hpp"
36
37 #include <stdio.h>
38
39 #include <string.h>
40
41 namespace boost {
42 namespace locale {
43 namespace gnu_gettext {
44
45 class c_file {
46 c_file(c_file const &);
47 void operator=(c_file const &);
48 public:
49
50 FILE *file;
51
52 c_file() :
53 file(0)
54 {
55 }
56 ~c_file()
57 {
58 close();
59 }
60
61 void close()
62 {
63 if(file) {
64 fclose(file);
65 file=0;
66 }
67 }
68
69 #if defined(BOOST_WINDOWS)
70
71 bool open(std::string const &file_name,std::string const &encoding)
72 {
73 close();
74
75 //
76 // Under windows we have to use "_wfopen" to get
77 // access to path's with Unicode in them
78 //
79 // As not all standard C++ libraries support nonstandard std::istream::open(wchar_t const *)
80 // we would use old and good stdio and _wfopen CRTL functions
81 //
82
83 std::wstring wfile_name = conv::to_utf<wchar_t>(file_name,encoding);
84 file = _wfopen(wfile_name.c_str(),L"rb");
85
86 return file!=0;
87 }
88
89 #else // POSIX systems do not have all this Wide API crap, as native codepages are UTF-8
90
91 // We do not use encoding as we use native file name encoding
92
93 bool open(std::string const &file_name,std::string const &/* encoding */)
94 {
95 close();
96
97 file = fopen(file_name.c_str(),"rb");
98
99 return file!=0;
100 }
101
102 #endif
103
104 };
105
106 class mo_file {
107 public:
108 typedef std::pair<char const *,char const *> pair_type;
109
110 mo_file(std::vector<char> &file) :
111 native_byteorder_(true),
112 size_(0)
113 {
114 load_file(file);
115 init();
116 }
117
118 mo_file(FILE *file) :
119 native_byteorder_(true),
120 size_(0)
121 {
122 load_file(file);
123 init();
124 }
125
126 pair_type find(char const *context_in,char const *key_in) const
127 {
128 pair_type null_pair((char const *)0,(char const *)0);
129 if(hash_size_==0)
130 return null_pair;
131 uint32_t hkey = 0;
132 if(context_in == 0)
133 hkey = pj_winberger_hash_function(key_in);
134 else {
135 pj_winberger_hash::state_type st = pj_winberger_hash::initial_state;
136 st = pj_winberger_hash::update_state(st,context_in);
137 st = pj_winberger_hash::update_state(st,'\4'); // EOT
138 st = pj_winberger_hash::update_state(st,key_in);
139 hkey = st;
140 }
141 uint32_t incr = 1 + hkey % (hash_size_-2);
142 hkey %= hash_size_;
143 uint32_t orig=hkey;
144
145
146 do {
147 uint32_t idx = get(hash_offset_ + 4*hkey);
148 /// Not found
149 if(idx == 0)
150 return null_pair;
151 /// If equal values return translation
152 if(key_equals(key(idx-1),context_in,key_in))
153 return value(idx-1);
154 /// Rehash
155 hkey=(hkey + incr) % hash_size_;
156 } while(hkey!=orig);
157 return null_pair;
158 }
159
160 static bool key_equals(char const *real_key,char const *cntx,char const *key)
161 {
162 if(cntx == 0)
163 return strcmp(real_key,key) == 0;
164 else {
165 size_t real_len = strlen(real_key);
166 size_t cntx_len = strlen(cntx);
167 size_t key_len = strlen(key);
168 if(cntx_len + 1 + key_len != real_len)
169 return false;
170 return
171 memcmp(real_key,cntx,cntx_len) == 0
172 && real_key[cntx_len] == '\4'
173 && memcmp(real_key + cntx_len + 1 ,key,key_len) == 0;
174 }
175 }
176
177 char const *key(int id) const
178 {
179 uint32_t off = get(keys_offset_ + id*8 + 4);
180 return data_ + off;
181 }
182
183 pair_type value(int id) const
184 {
185 uint32_t len = get(translations_offset_ + id*8);
186 uint32_t off = get(translations_offset_ + id*8 + 4);
187 if(off >= file_size_ || off + len >= file_size_)
188 throw std::runtime_error("Bad mo-file format");
189 return pair_type(&data_[off],&data_[off]+len);
190 }
191
192 bool has_hash() const
193 {
194 return hash_size_ != 0;
195 }
196
197 size_t size() const
198 {
199 return size_;
200 }
201
202 bool empty()
203 {
204 return size_ == 0;
205 }
206
207 private:
208 void init()
209 {
210 // Read all format sizes
211 size_=get(8);
212 keys_offset_=get(12);
213 translations_offset_=get(16);
214 hash_size_=get(20);
215 hash_offset_=get(24);
216 }
217
218 void load_file(std::vector<char> &data)
219 {
220 vdata_.swap(data);
221 file_size_ = vdata_.size();
222 data_ = &vdata_[0];
223 if(file_size_ < 4 )
224 throw std::runtime_error("invalid 'mo' file format - the file is too short");
225 uint32_t magic=0;
226 memcpy(&magic,data_,4);
227 if(magic == 0x950412de)
228 native_byteorder_ = true;
229 else if(magic == 0xde120495)
230 native_byteorder_ = false;
231 else
232 throw std::runtime_error("Invalid file format - invalid magic number");
233 }
234
235 void load_file(FILE *file)
236 {
237 uint32_t magic=0;
238 // if the size is wrong magic would be wrong
239 // ok to ingnore fread result
240 size_t four_bytes = fread(&magic,4,1,file);
241 (void)four_bytes; // shut GCC
242
243 if(magic == 0x950412de)
244 native_byteorder_ = true;
245 else if(magic == 0xde120495)
246 native_byteorder_ = false;
247 else
248 throw std::runtime_error("Invalid file format");
249
250 fseek(file,0,SEEK_END);
251 long len=ftell(file);
252 if(len < 0) {
253 throw std::runtime_error("Wrong file object");
254 }
255 fseek(file,0,SEEK_SET);
256 vdata_.resize(len+1,0); // +1 to make sure the vector is not empty
257 if(fread(&vdata_.front(),1,len,file)!=unsigned(len))
258 throw std::runtime_error("Failed to read file");
259 data_ = &vdata_[0];
260 file_size_ = len;
261 }
262
263 uint32_t get(unsigned offset) const
264 {
265 uint32_t tmp;
266 if(offset > file_size_ - 4) {
267 throw std::runtime_error("Bad mo-file format");
268 }
269 memcpy(&tmp,data_ + offset,4);
270 convert(tmp);
271 return tmp;
272 }
273
274 void convert(uint32_t &v) const
275 {
276 if(native_byteorder_)
277 return;
278 v = ((v & 0xFF) << 24)
279 | ((v & 0xFF00) << 8)
280 | ((v & 0xFF0000) >> 8)
281 | ((v & 0xFF000000) >> 24);
282 }
283
284 uint32_t keys_offset_;
285 uint32_t translations_offset_;
286 uint32_t hash_size_;
287 uint32_t hash_offset_;
288
289 char const *data_;
290 size_t file_size_;
291 std::vector<char> vdata_;
292 bool native_byteorder_;
293 size_t size_;
294 };
295
296 template<typename CharType>
297 struct mo_file_use_traits {
298 static const bool in_use = false;
299 typedef CharType char_type;
300 typedef std::pair<char_type const *,char_type const *> pair_type;
301 static pair_type use(mo_file const &/*mo*/,char_type const * /*context*/,char_type const * /*key*/)
302 {
303 return pair_type((char_type const *)(0),(char_type const *)(0));
304 }
305 };
306
307 template<>
308 struct mo_file_use_traits<char> {
309 static const bool in_use = true;
310 typedef char char_type;
311 typedef std::pair<char_type const *,char_type const *> pair_type;
312 static pair_type use(mo_file const &mo,char const *context,char const *key)
313 {
314 return mo.find(context,key);
315 }
316 };
317
318 template<typename CharType>
319 class converter {
320 public:
321 converter(std::string /*out_enc*/,std::string in_enc) :
322 in_(in_enc)
323 {
324 }
325
326 std::basic_string<CharType> operator()(char const *begin,char const *end)
327 {
328 return conv::to_utf<CharType>(begin,end,in_,conv::stop);
329 }
330
331 private:
332 std::string in_;
333 };
334
335 template<>
336 class converter<char> {
337 public:
338 converter(std::string out_enc,std::string in_enc) :
339 out_(out_enc),
340 in_(in_enc)
341 {
342 }
343
344 std::string operator()(char const *begin,char const *end)
345 {
346 return conv::between(begin,end,out_,in_,conv::stop);
347 }
348
349 private:
350 std::string out_,in_;
351 };
352
353 template<typename CharType>
354 struct message_key {
355 typedef CharType char_type;
356 typedef std::basic_string<char_type> string_type;
357
358
359 message_key(string_type const &c = string_type()) :
360 c_context_(0),
361 c_key_(0)
362 {
363 size_t pos = c.find(char_type(4));
364 if(pos == string_type::npos) {
365 key_ = c;
366 }
367 else {
368 context_ = c.substr(0,pos);
369 key_ = c.substr(pos+1);
370 }
371 }
372 message_key(char_type const *c,char_type const *k) :
373 c_key_(k)
374 {
375 static const char_type empty = 0;
376 if(c!=0)
377 c_context_ = c;
378 else
379 c_context_ = &empty;
380 }
381 bool operator < (message_key const &other) const
382 {
383 int cc = compare(context(),other.context());
384 if(cc != 0)
385 return cc < 0;
386 return compare(key(),other.key()) < 0;
387 }
388 bool operator==(message_key const &other) const
389 {
390 return compare(context(),other.context()) == 0
391 && compare(key(),other.key())==0;
392 }
393 bool operator!=(message_key const &other) const
394 {
395 return !(*this==other);
396 }
397 char_type const *context() const
398 {
399 if(c_context_)
400 return c_context_;
401 return context_.c_str();
402 }
403 char_type const *key() const
404 {
405 if(c_key_)
406 return c_key_;
407 return key_.c_str();
408 }
409 private:
410 static int compare(char_type const *l,char_type const *r)
411 {
412 typedef std::char_traits<char_type> traits_type;
413 for(;;) {
414 char_type cl = *l++;
415 char_type cr = *r++;
416 if(cl == 0 && cr == 0)
417 return 0;
418 if(traits_type::lt(cl,cr))
419 return -1;
420 if(traits_type::lt(cr,cl))
421 return 1;
422 }
423 }
424 string_type context_;
425 string_type key_;
426 char_type const *c_context_;
427 char_type const *c_key_;
428 };
429
430 template<typename CharType>
431 struct hash_function {
432 size_t operator()(message_key<CharType> const &msg) const
433 {
434 pj_winberger_hash::state_type state = pj_winberger_hash::initial_state;
435 CharType const *p = msg.context();
436 if(*p != 0) {
437 CharType const *e = p;
438 while(*e)
439 e++;
440 state = pj_winberger_hash::update_state(state,
441 reinterpret_cast<char const *>(p),
442 reinterpret_cast<char const *>(e));
443 state = pj_winberger_hash::update_state(state,'\4');
444 }
445 p = msg.key();
446 CharType const *e = p;
447 while(*e)
448 e++;
449 state = pj_winberger_hash::update_state(state,
450 reinterpret_cast<char const *>(p),
451 reinterpret_cast<char const *>(e));
452 return state;
453 }
454 };
455
456
457 // By default for wide types the conversion is not requiredyy
458 template<typename CharType>
459 CharType const *runtime_conversion(CharType const *msg,
460 std::basic_string<CharType> &/*buffer*/,
461 bool /*do_conversion*/,
462 std::string const &/*locale_encoding*/,
463 std::string const &/*key_encoding*/)
464 {
465 return msg;
466 }
467
468 // But still need to specialize for char
469 template<>
470 char const *runtime_conversion( char const *msg,
471 std::string &buffer,
472 bool do_conversion,
473 std::string const &locale_encoding,
474 std::string const &key_encoding)
475 {
476 if(!do_conversion)
477 return msg;
478 if(details::is_us_ascii_string(msg))
479 return msg;
480 std::string tmp = conv::between(msg,locale_encoding,key_encoding,conv::skip);
481 buffer.swap(tmp);
482 return buffer.c_str();
483 }
484
485 template<typename CharType>
486 class mo_message : public message_format<CharType> {
487
488 typedef CharType char_type;
489 typedef std::basic_string<CharType> string_type;
490 typedef message_key<CharType> key_type;
491 #ifdef BOOST_LOCALE_UNORDERED_CATALOG
492 typedef boost::unordered_map<key_type,string_type,hash_function<CharType> > catalog_type;
493 #else
494 typedef std::map<key_type,string_type> catalog_type;
495 #endif
496 typedef std::vector<catalog_type> catalogs_set_type;
497 typedef std::map<std::string,int> domains_map_type;
498 public:
499
500 typedef std::pair<CharType const *,CharType const *> pair_type;
501
502 virtual char_type const *get(int domain_id,char_type const *context,char_type const *id) const
503 {
504 return get_string(domain_id,context,id).first;
505 }
506
507 virtual char_type const *get(int domain_id,char_type const *context,char_type const *single_id,int n) const
508 {
509 pair_type ptr = get_string(domain_id,context,single_id);
510 if(!ptr.first)
511 return 0;
512 int form=0;
513 if(plural_forms_.at(domain_id))
514 form = (*plural_forms_[domain_id])(n);
515 else
516 form = n == 1 ? 0 : 1; // Fallback to english plural form
517
518 CharType const *p=ptr.first;
519 for(int i=0;p < ptr.second && i<form;i++) {
520 p=std::find(p,ptr.second,0);
521 if(p==ptr.second)
522 return 0;
523 ++p;
524 }
525 if(p>=ptr.second)
526 return 0;
527 return p;
528 }
529
530 virtual int domain(std::string const &domain) const
531 {
532 domains_map_type::const_iterator p=domains_.find(domain);
533 if(p==domains_.end())
534 return -1;
535 return p->second;
536 }
537
538 mo_message(messages_info const &inf)
539 {
540 std::string language = inf.language;
541 std::string variant = inf.variant;
542 std::string country = inf.country;
543 std::string encoding = inf.encoding;
544 std::string lc_cat = inf.locale_category;
545 std::vector<messages_info::domain> const &domains = inf.domains;
546 std::vector<std::string> const &search_paths = inf.paths;
547
548 //
549 // List of fallbacks: en_US@euro, en@euro, en_US, en.
550 //
551 std::vector<std::string> paths;
552
553
554 if(!variant.empty() && !country.empty())
555 paths.push_back(language + "_" + country + "@" + variant);
556
557 if(!variant.empty())
558 paths.push_back(language + "@" + variant);
559
560 if(!country.empty())
561 paths.push_back(language + "_" + country);
562
563 paths.push_back(language);
564
565 catalogs_.resize(domains.size());
566 mo_catalogs_.resize(domains.size());
567 plural_forms_.resize(domains.size());
568
569
570 for(unsigned id=0;id<domains.size();id++) {
571 std::string domain=domains[id].name;
572 std::string key_encoding = domains[id].encoding;
573 domains_[domain]=id;
574
575
576 bool found=false;
577 for(unsigned j=0;!found && j<paths.size();j++) {
578 for(unsigned i=0;!found && i<search_paths.size();i++) {
579 std::string full_path = search_paths[i]+"/"+paths[j]+"/" + lc_cat + "/"+domain+".mo";
580 found = load_file(full_path,encoding,key_encoding,id,inf.callback);
581 }
582 }
583 }
584 }
585
586 char_type const *convert(char_type const *msg,string_type &buffer) const
587 {
588 return runtime_conversion<char_type>(msg,buffer,key_conversion_required_,locale_encoding_,key_encoding_);
589 }
590
591 virtual ~mo_message()
592 {
593 }
594
595 private:
596 int compare_encodings(std::string const &left,std::string const &right)
597 {
598 return convert_encoding_name(left).compare(convert_encoding_name(right));
599 }
600
601 std::string convert_encoding_name(std::string const &in)
602 {
603 std::string result;
604 for(unsigned i=0;i<in.size();i++) {
605 char c=in[i];
606 if('A' <= c && c<='Z')
607 c=c-'A' + 'a';
608 else if(('a' <= c && c<='z') || ('0' <= c && c<='9'))
609 ;
610 else
611 continue;
612 result+=c;
613 }
614 return result;
615 }
616
617
618 bool load_file( std::string const &file_name,
619 std::string const &locale_encoding,
620 std::string const &key_encoding,
621 int id,
622 messages_info::callback_type const &callback)
623 {
624 locale_encoding_ = locale_encoding;
625 key_encoding_ = key_encoding;
626
627 key_conversion_required_ = sizeof(CharType) == 1
628 && compare_encodings(locale_encoding,key_encoding)!=0;
629
630 std::auto_ptr<mo_file> mo;
631
632 if(callback) {
633 std::vector<char> vfile = callback(file_name,locale_encoding);
634 if(vfile.empty())
635 return false;
636 mo.reset(new mo_file(vfile));
637 }
638 else {
639 c_file the_file;
640 the_file.open(file_name,locale_encoding);
641 if(!the_file.file)
642 return false;
643 mo.reset(new mo_file(the_file.file));
644 }
645
646 std::string plural = extract(mo->value(0).first,"plural=","\r\n;");
647
648 std::string mo_encoding = extract(mo->value(0).first,"charset="," \r\n;");
649
650 if(mo_encoding.empty())
651 throw std::runtime_error("Invalid mo-format, encoding is not specified");
652
653 if(!plural.empty()) {
654 std::auto_ptr<lambda::plural> ptr=lambda::compile(plural.c_str());
655 plural_forms_[id] = ptr;
656 }
657
658 if( mo_useable_directly(mo_encoding,*mo) )
659 {
660 mo_catalogs_[id]=mo;
661 }
662 else {
663 converter<CharType> cvt_value(locale_encoding,mo_encoding);
664 converter<CharType> cvt_key(key_encoding,mo_encoding);
665 for(unsigned i=0;i<mo->size();i++) {
666 char const *ckey = mo->key(i);
667 string_type skey = cvt_key(ckey,ckey+strlen(ckey));
668 key_type key(skey);
669
670 mo_file::pair_type tmp = mo->value(i);
671 string_type value = cvt_value(tmp.first,tmp.second);
672 catalogs_[id][key].swap(value);
673 }
674 }
675 return true;
676
677 }
678
679 // Check if the mo file as-is is useful
680 // 1. It is char and not wide character
681 // 2. The locale encoding and mo encoding is same
682 // 3. The source strings encoding and mo encoding is same or all
683 // mo key strings are US-ASCII
684 bool mo_useable_directly( std::string const &mo_encoding,
685 mo_file const &mo)
686 {
687 if(sizeof(CharType) != 1)
688 return false;
689 if(!mo.has_hash())
690 return false;
691 if(compare_encodings(mo_encoding.c_str(),locale_encoding_.c_str())!=0)
692 return false;
693 if(compare_encodings(mo_encoding.c_str(),key_encoding_.c_str())==0) {
694 return true;
695 }
696 for(unsigned i=0;i<mo.size();i++) {
697 if(!details::is_us_ascii_string(mo.key(i))) {
698 return false;
699 }
700 }
701 return true;
702 }
703
704
705
706 static std::string extract(std::string const &meta,std::string const &key,char const *separator)
707 {
708 size_t pos=meta.find(key);
709 if(pos == std::string::npos)
710 return "";
711 pos+=key.size(); /// size of charset=
712 size_t end_pos = meta.find_first_of(separator,pos);
713 return meta.substr(pos,end_pos - pos);
714 }
715
716
717
718
719 pair_type get_string(int domain_id,char_type const *context,char_type const *in_id) const
720 {
721 pair_type null_pair((CharType const *)0,(CharType const *)0);
722 if(domain_id < 0 || size_t(domain_id) >= catalogs_.size())
723 return null_pair;
724 if(mo_file_use_traits<char_type>::in_use && mo_catalogs_[domain_id]) {
725 return mo_file_use_traits<char_type>::use(*mo_catalogs_[domain_id],context,in_id);
726 }
727 else {
728 key_type key(context,in_id);
729 catalog_type const &cat = catalogs_[domain_id];
730 typename catalog_type::const_iterator p = cat.find(key);
731 if(p==cat.end()) {
732 return null_pair;
733 }
734 return pair_type(p->second.data(),p->second.data()+p->second.size());
735 }
736 }
737
738 catalogs_set_type catalogs_;
739 std::vector<boost::shared_ptr<mo_file> > mo_catalogs_;
740 std::vector<boost::shared_ptr<lambda::plural> > plural_forms_;
741 domains_map_type domains_;
742
743 std::string locale_encoding_;
744 std::string key_encoding_;
745 bool key_conversion_required_;
746 };
747
748 template<>
749 message_format<char> *create_messages_facet(messages_info const &info)
750 {
751 return new mo_message<char>(info);
752 }
753
754 template<>
755 message_format<wchar_t> *create_messages_facet(messages_info const &info)
756 {
757 return new mo_message<wchar_t>(info);
758 }
759
760 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
761
762 template<>
763 message_format<char16_t> *create_messages_facet(messages_info const &info)
764 {
765 return new mo_message<char16_t>(info);
766 }
767 #endif
768
769 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
770
771 template<>
772 message_format<char32_t> *create_messages_facet(messages_info const &info)
773 {
774 return new mo_message<char32_t>(info);
775 }
776 #endif
777
778
779 } /// gnu_gettext
780
781 } // locale
782 } // boost
783 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
784