2 // Copyright (c) 2009-2015 Artyom Beilis (Tonkikh)
4 // Distributed under the Boost Software License, Version 1.0. (See
5 // accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
8 #define BOOST_LOCALE_SOURCE
9 #define BOOST_DETAIL_NO_CONTAINER_FWD
10 #include <boost/config.hpp>
11 #include <boost/version.hpp>
12 #include <boost/locale/message.hpp>
13 #include <boost/locale/gnu_gettext.hpp>
14 #include <boost/shared_ptr.hpp>
15 #include <boost/locale/encoding.hpp>
17 # pragma warning(disable : 4996)
21 #if BOOST_VERSION >= 103600
22 #define BOOST_LOCALE_UNORDERED_CATALOG
25 #ifdef BOOST_LOCALE_UNORDERED_CATALOG
26 #include <boost/unordered_map.hpp>
34 #include "mo_hash.hpp"
35 #include "mo_lambda.hpp"
43 namespace gnu_gettext
{
46 c_file(c_file
const &);
47 void operator=(c_file
const &);
69 #if defined(BOOST_WINDOWS)
71 bool open(std::string
const &file_name
,std::string
const &encoding
)
76 // Under windows we have to use "_wfopen" to get
77 // access to path's with Unicode in them
79 // As not all standard C++ libraries support nonstandard std::istream::open(wchar_t const *)
80 // we would use old and good stdio and _wfopen CRTL functions
83 std::wstring wfile_name
= conv::to_utf
<wchar_t>(file_name
,encoding
);
84 file
= _wfopen(wfile_name
.c_str(),L
"rb");
89 #else // POSIX systems do not have all this Wide API crap, as native codepages are UTF-8
91 // We do not use encoding as we use native file name encoding
93 bool open(std::string
const &file_name
,std::string
const &/* encoding */)
97 file
= fopen(file_name
.c_str(),"rb");
108 typedef std::pair
<char const *,char const *> pair_type
;
110 mo_file(std::vector
<char> &file
) :
111 native_byteorder_(true),
118 mo_file(FILE *file
) :
119 native_byteorder_(true),
126 pair_type
find(char const *context_in
,char const *key_in
) const
128 pair_type
null_pair((char const *)0,(char const *)0);
133 hkey
= pj_winberger_hash_function(key_in
);
135 pj_winberger_hash::state_type st
= pj_winberger_hash::initial_state
;
136 st
= pj_winberger_hash::update_state(st
,context_in
);
137 st
= pj_winberger_hash::update_state(st
,'\4'); // EOT
138 st
= pj_winberger_hash::update_state(st
,key_in
);
141 uint32_t incr
= 1 + hkey
% (hash_size_
-2);
147 uint32_t idx
= get(hash_offset_
+ 4*hkey
);
151 /// If equal values return translation
152 if(key_equals(key(idx
-1),context_in
,key_in
))
155 hkey
=(hkey
+ incr
) % hash_size_
;
160 static bool key_equals(char const *real_key
,char const *cntx
,char const *key
)
163 return strcmp(real_key
,key
) == 0;
165 size_t real_len
= strlen(real_key
);
166 size_t cntx_len
= strlen(cntx
);
167 size_t key_len
= strlen(key
);
168 if(cntx_len
+ 1 + key_len
!= real_len
)
171 memcmp(real_key
,cntx
,cntx_len
) == 0
172 && real_key
[cntx_len
] == '\4'
173 && memcmp(real_key
+ cntx_len
+ 1 ,key
,key_len
) == 0;
177 char const *key(int id
) const
179 uint32_t off
= get(keys_offset_
+ id
*8 + 4);
183 pair_type
value(int id
) const
185 uint32_t len
= get(translations_offset_
+ id
*8);
186 uint32_t off
= get(translations_offset_
+ id
*8 + 4);
187 if(off
>= file_size_
|| off
+ len
>= file_size_
)
188 throw std::runtime_error("Bad mo-file format");
189 return pair_type(&data_
[off
],&data_
[off
]+len
);
192 bool has_hash() const
194 return hash_size_
!= 0;
210 // Read all format sizes
212 keys_offset_
=get(12);
213 translations_offset_
=get(16);
215 hash_offset_
=get(24);
218 void load_file(std::vector
<char> &data
)
221 file_size_
= vdata_
.size();
224 throw std::runtime_error("invalid 'mo' file format - the file is too short");
226 memcpy(&magic
,data_
,4);
227 if(magic
== 0x950412de)
228 native_byteorder_
= true;
229 else if(magic
== 0xde120495)
230 native_byteorder_
= false;
232 throw std::runtime_error("Invalid file format - invalid magic number");
235 void load_file(FILE *file
)
238 // if the size is wrong magic would be wrong
239 // ok to ingnore fread result
240 size_t four_bytes
= fread(&magic
,4,1,file
);
241 (void)four_bytes
; // shut GCC
243 if(magic
== 0x950412de)
244 native_byteorder_
= true;
245 else if(magic
== 0xde120495)
246 native_byteorder_
= false;
248 throw std::runtime_error("Invalid file format");
250 fseek(file
,0,SEEK_END
);
251 long len
=ftell(file
);
253 throw std::runtime_error("Wrong file object");
255 fseek(file
,0,SEEK_SET
);
256 vdata_
.resize(len
+1,0); // +1 to make sure the vector is not empty
257 if(fread(&vdata_
.front(),1,len
,file
)!=unsigned(len
))
258 throw std::runtime_error("Failed to read file");
263 uint32_t get(unsigned offset
) const
266 if(offset
> file_size_
- 4) {
267 throw std::runtime_error("Bad mo-file format");
269 memcpy(&tmp
,data_
+ offset
,4);
274 void convert(uint32_t &v
) const
276 if(native_byteorder_
)
278 v
= ((v
& 0xFF) << 24)
279 | ((v
& 0xFF00) << 8)
280 | ((v
& 0xFF0000) >> 8)
281 | ((v
& 0xFF000000) >> 24);
284 uint32_t keys_offset_
;
285 uint32_t translations_offset_
;
287 uint32_t hash_offset_
;
291 std::vector
<char> vdata_
;
292 bool native_byteorder_
;
296 template<typename CharType
>
297 struct mo_file_use_traits
{
298 static const bool in_use
= false;
299 typedef CharType char_type
;
300 typedef std::pair
<char_type
const *,char_type
const *> pair_type
;
301 static pair_type
use(mo_file
const &/*mo*/,char_type
const * /*context*/,char_type
const * /*key*/)
303 return pair_type((char_type
const *)(0),(char_type
const *)(0));
308 struct mo_file_use_traits
<char> {
309 static const bool in_use
= true;
310 typedef char char_type
;
311 typedef std::pair
<char_type
const *,char_type
const *> pair_type
;
312 static pair_type
use(mo_file
const &mo
,char const *context
,char const *key
)
314 return mo
.find(context
,key
);
318 template<typename CharType
>
321 converter(std::string
/*out_enc*/,std::string in_enc
) :
326 std::basic_string
<CharType
> operator()(char const *begin
,char const *end
)
328 return conv::to_utf
<CharType
>(begin
,end
,in_
,conv::stop
);
336 class converter
<char> {
338 converter(std::string out_enc
,std::string in_enc
) :
344 std::string
operator()(char const *begin
,char const *end
)
346 return conv::between(begin
,end
,out_
,in_
,conv::stop
);
350 std::string out_
,in_
;
353 template<typename CharType
>
355 typedef CharType char_type
;
356 typedef std::basic_string
<char_type
> string_type
;
359 message_key(string_type
const &c
= string_type()) :
363 size_t pos
= c
.find(char_type(4));
364 if(pos
== string_type::npos
) {
368 context_
= c
.substr(0,pos
);
369 key_
= c
.substr(pos
+1);
372 message_key(char_type
const *c
,char_type
const *k
) :
375 static const char_type empty
= 0;
381 bool operator < (message_key
const &other
) const
383 int cc
= compare(context(),other
.context());
386 return compare(key(),other
.key()) < 0;
388 bool operator==(message_key
const &other
) const
390 return compare(context(),other
.context()) == 0
391 && compare(key(),other
.key())==0;
393 bool operator!=(message_key
const &other
) const
395 return !(*this==other
);
397 char_type
const *context() const
401 return context_
.c_str();
403 char_type
const *key() const
410 static int compare(char_type
const *l
,char_type
const *r
)
412 typedef std::char_traits
<char_type
> traits_type
;
416 if(cl
== 0 && cr
== 0)
418 if(traits_type::lt(cl
,cr
))
420 if(traits_type::lt(cr
,cl
))
424 string_type context_
;
426 char_type
const *c_context_
;
427 char_type
const *c_key_
;
430 template<typename CharType
>
431 struct hash_function
{
432 size_t operator()(message_key
<CharType
> const &msg
) const
434 pj_winberger_hash::state_type state
= pj_winberger_hash::initial_state
;
435 CharType
const *p
= msg
.context();
437 CharType
const *e
= p
;
440 state
= pj_winberger_hash::update_state(state
,
441 reinterpret_cast<char const *>(p
),
442 reinterpret_cast<char const *>(e
));
443 state
= pj_winberger_hash::update_state(state
,'\4');
446 CharType
const *e
= p
;
449 state
= pj_winberger_hash::update_state(state
,
450 reinterpret_cast<char const *>(p
),
451 reinterpret_cast<char const *>(e
));
457 // By default for wide types the conversion is not requiredyy
458 template<typename CharType
>
459 CharType
const *runtime_conversion(CharType
const *msg
,
460 std::basic_string
<CharType
> &/*buffer*/,
461 bool /*do_conversion*/,
462 std::string
const &/*locale_encoding*/,
463 std::string
const &/*key_encoding*/)
468 // But still need to specialize for char
470 char const *runtime_conversion( char const *msg
,
473 std::string
const &locale_encoding
,
474 std::string
const &key_encoding
)
478 if(details::is_us_ascii_string(msg
))
480 std::string tmp
= conv::between(msg
,locale_encoding
,key_encoding
,conv::skip
);
482 return buffer
.c_str();
485 template<typename CharType
>
486 class mo_message
: public message_format
<CharType
> {
488 typedef CharType char_type
;
489 typedef std::basic_string
<CharType
> string_type
;
490 typedef message_key
<CharType
> key_type
;
491 #ifdef BOOST_LOCALE_UNORDERED_CATALOG
492 typedef boost::unordered_map
<key_type
,string_type
,hash_function
<CharType
> > catalog_type
;
494 typedef std::map
<key_type
,string_type
> catalog_type
;
496 typedef std::vector
<catalog_type
> catalogs_set_type
;
497 typedef std::map
<std::string
,int> domains_map_type
;
500 typedef std::pair
<CharType
const *,CharType
const *> pair_type
;
502 virtual char_type
const *get(int domain_id
,char_type
const *context
,char_type
const *id
) const
504 return get_string(domain_id
,context
,id
).first
;
507 virtual char_type
const *get(int domain_id
,char_type
const *context
,char_type
const *single_id
,int n
) const
509 pair_type ptr
= get_string(domain_id
,context
,single_id
);
513 if(plural_forms_
.at(domain_id
))
514 form
= (*plural_forms_
[domain_id
])(n
);
516 form
= n
== 1 ? 0 : 1; // Fallback to english plural form
518 CharType
const *p
=ptr
.first
;
519 for(int i
=0;p
< ptr
.second
&& i
<form
;i
++) {
520 p
=std::find(p
,ptr
.second
,0);
530 virtual int domain(std::string
const &domain
) const
532 domains_map_type::const_iterator p
=domains_
.find(domain
);
533 if(p
==domains_
.end())
538 mo_message(messages_info
const &inf
)
540 std::string language
= inf
.language
;
541 std::string variant
= inf
.variant
;
542 std::string country
= inf
.country
;
543 std::string encoding
= inf
.encoding
;
544 std::string lc_cat
= inf
.locale_category
;
545 std::vector
<messages_info::domain
> const &domains
= inf
.domains
;
546 std::vector
<std::string
> const &search_paths
= inf
.paths
;
549 // List of fallbacks: en_US@euro, en@euro, en_US, en.
551 std::vector
<std::string
> paths
;
554 if(!variant
.empty() && !country
.empty())
555 paths
.push_back(language
+ "_" + country
+ "@" + variant
);
558 paths
.push_back(language
+ "@" + variant
);
561 paths
.push_back(language
+ "_" + country
);
563 paths
.push_back(language
);
565 catalogs_
.resize(domains
.size());
566 mo_catalogs_
.resize(domains
.size());
567 plural_forms_
.resize(domains
.size());
570 for(unsigned id
=0;id
<domains
.size();id
++) {
571 std::string domain
=domains
[id
].name
;
572 std::string key_encoding
= domains
[id
].encoding
;
577 for(unsigned j
=0;!found
&& j
<paths
.size();j
++) {
578 for(unsigned i
=0;!found
&& i
<search_paths
.size();i
++) {
579 std::string full_path
= search_paths
[i
]+"/"+paths
[j
]+"/" + lc_cat
+ "/"+domain
+".mo";
580 found
= load_file(full_path
,encoding
,key_encoding
,id
,inf
.callback
);
586 char_type
const *convert(char_type
const *msg
,string_type
&buffer
) const
588 return runtime_conversion
<char_type
>(msg
,buffer
,key_conversion_required_
,locale_encoding_
,key_encoding_
);
591 virtual ~mo_message()
596 int compare_encodings(std::string
const &left
,std::string
const &right
)
598 return convert_encoding_name(left
).compare(convert_encoding_name(right
));
601 std::string
convert_encoding_name(std::string
const &in
)
604 for(unsigned i
=0;i
<in
.size();i
++) {
606 if('A' <= c
&& c
<='Z')
608 else if(('a' <= c
&& c
<='z') || ('0' <= c
&& c
<='9'))
618 bool load_file( std::string
const &file_name
,
619 std::string
const &locale_encoding
,
620 std::string
const &key_encoding
,
622 messages_info::callback_type
const &callback
)
624 locale_encoding_
= locale_encoding
;
625 key_encoding_
= key_encoding
;
627 key_conversion_required_
= sizeof(CharType
) == 1
628 && compare_encodings(locale_encoding
,key_encoding
)!=0;
630 std::auto_ptr
<mo_file
> mo
;
633 std::vector
<char> vfile
= callback(file_name
,locale_encoding
);
636 mo
.reset(new mo_file(vfile
));
640 the_file
.open(file_name
,locale_encoding
);
643 mo
.reset(new mo_file(the_file
.file
));
646 std::string plural
= extract(mo
->value(0).first
,"plural=","\r\n;");
648 std::string mo_encoding
= extract(mo
->value(0).first
,"charset="," \r\n;");
650 if(mo_encoding
.empty())
651 throw std::runtime_error("Invalid mo-format, encoding is not specified");
653 if(!plural
.empty()) {
654 std::auto_ptr
<lambda::plural
> ptr
=lambda::compile(plural
.c_str());
655 plural_forms_
[id
] = ptr
;
658 if( mo_useable_directly(mo_encoding
,*mo
) )
663 converter
<CharType
> cvt_value(locale_encoding
,mo_encoding
);
664 converter
<CharType
> cvt_key(key_encoding
,mo_encoding
);
665 for(unsigned i
=0;i
<mo
->size();i
++) {
666 char const *ckey
= mo
->key(i
);
667 string_type skey
= cvt_key(ckey
,ckey
+strlen(ckey
));
670 mo_file::pair_type tmp
= mo
->value(i
);
671 string_type value
= cvt_value(tmp
.first
,tmp
.second
);
672 catalogs_
[id
][key
].swap(value
);
679 // Check if the mo file as-is is useful
680 // 1. It is char and not wide character
681 // 2. The locale encoding and mo encoding is same
682 // 3. The source strings encoding and mo encoding is same or all
683 // mo key strings are US-ASCII
684 bool mo_useable_directly( std::string
const &mo_encoding
,
687 if(sizeof(CharType
) != 1)
691 if(compare_encodings(mo_encoding
.c_str(),locale_encoding_
.c_str())!=0)
693 if(compare_encodings(mo_encoding
.c_str(),key_encoding_
.c_str())==0) {
696 for(unsigned i
=0;i
<mo
.size();i
++) {
697 if(!details::is_us_ascii_string(mo
.key(i
))) {
706 static std::string
extract(std::string
const &meta
,std::string
const &key
,char const *separator
)
708 size_t pos
=meta
.find(key
);
709 if(pos
== std::string::npos
)
711 pos
+=key
.size(); /// size of charset=
712 size_t end_pos
= meta
.find_first_of(separator
,pos
);
713 return meta
.substr(pos
,end_pos
- pos
);
719 pair_type
get_string(int domain_id
,char_type
const *context
,char_type
const *in_id
) const
721 pair_type
null_pair((CharType
const *)0,(CharType
const *)0);
722 if(domain_id
< 0 || size_t(domain_id
) >= catalogs_
.size())
724 if(mo_file_use_traits
<char_type
>::in_use
&& mo_catalogs_
[domain_id
]) {
725 return mo_file_use_traits
<char_type
>::use(*mo_catalogs_
[domain_id
],context
,in_id
);
728 key_type
key(context
,in_id
);
729 catalog_type
const &cat
= catalogs_
[domain_id
];
730 typename
catalog_type::const_iterator p
= cat
.find(key
);
734 return pair_type(p
->second
.data(),p
->second
.data()+p
->second
.size());
738 catalogs_set_type catalogs_
;
739 std::vector
<boost::shared_ptr
<mo_file
> > mo_catalogs_
;
740 std::vector
<boost::shared_ptr
<lambda::plural
> > plural_forms_
;
741 domains_map_type domains_
;
743 std::string locale_encoding_
;
744 std::string key_encoding_
;
745 bool key_conversion_required_
;
749 message_format
<char> *create_messages_facet(messages_info
const &info
)
751 return new mo_message
<char>(info
);
755 message_format
<wchar_t> *create_messages_facet(messages_info
const &info
)
757 return new mo_message
<wchar_t>(info
);
760 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
763 message_format
<char16_t
> *create_messages_facet(messages_info
const &info
)
765 return new mo_message
<char16_t
>(info
);
769 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
772 message_format
<char32_t
> *create_messages_facet(messages_info
const &info
)
774 return new mo_message
<char32_t
>(info
);
783 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4