2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
4 // Distributed under the Boost Software License, Version 1.0. (See
5 // accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
8 #define BOOST_LOCALE_SOURCE
9 #include <boost/locale/collator.hpp>
10 #include <boost/locale/generator.hpp>
11 #include <boost/thread.hpp>
16 #include "all_generator.hpp"
18 #include "../shared/mo_hash.hpp"
20 #include <unicode/coll.h>
21 #if U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 402
22 # include <unicode/stringpiece.h>
28 template<typename CharType
>
29 class collate_impl
: public collator
<CharType
>
32 typedef typename collator
<CharType
>::level_type level_type
;
33 level_type
limit(level_type level
) const
36 level
=collator_base::primary
;
37 else if(level
>= level_count
)
38 level
= static_cast<level_type
>(level_count
- 1);
42 #if U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 402
43 int do_utf8_compare( level_type level
,
44 char const *b1
,char const *e1
,
45 char const *b2
,char const *e2
,
46 UErrorCode
&status
) const
48 icu::StringPiece
left (b1
,e1
-b1
);
49 icu::StringPiece
right(b2
,e2
-b2
);
50 return get_collator(level
)->compareUTF8(left
,right
,status
);
55 int do_ustring_compare( level_type level
,
56 CharType
const *b1
,CharType
const *e1
,
57 CharType
const *b2
,CharType
const *e2
,
58 UErrorCode
&status
) const
60 icu::UnicodeString left
=cvt_
.icu(b1
,e1
);
61 icu::UnicodeString right
=cvt_
.icu(b2
,e2
);
62 return get_collator(level
)->compare(left
,right
,status
);
65 int do_real_compare(level_type level
,
66 CharType
const *b1
,CharType
const *e1
,
67 CharType
const *b2
,CharType
const *e2
,
68 UErrorCode
&status
) const
70 return do_ustring_compare(level
,b1
,e1
,b2
,e2
,status
);
73 virtual int do_compare( level_type level
,
74 CharType
const *b1
,CharType
const *e1
,
75 CharType
const *b2
,CharType
const *e2
) const
77 UErrorCode status
=U_ZERO_ERROR
;
79 int res
= do_real_compare(level
,b1
,e1
,b2
,e2
,status
);
82 throw std::runtime_error(std::string("Collation failed:") + u_errorName(status
));
90 std::vector
<uint8_t> do_basic_transform(level_type level
,CharType
const *b
,CharType
const *e
) const
92 icu::UnicodeString str
=cvt_
.icu(b
,e
);
93 std::vector
<uint8_t> tmp
;
94 tmp
.resize(str
.length());
95 icu::Collator
*collate
= get_collator(level
);
96 int len
= collate
->getSortKey(str
,&tmp
[0],tmp
.size());
97 if(len
> int(tmp
.size())) {
99 collate
->getSortKey(str
,&tmp
[0],tmp
.size());
105 std::basic_string
<CharType
> do_transform(level_type level
,CharType
const *b
,CharType
const *e
) const
107 std::vector
<uint8_t> tmp
= do_basic_transform(level
,b
,e
);
108 return std::basic_string
<CharType
>(tmp
.begin(),tmp
.end());
111 long do_hash(level_type level
,CharType
const *b
,CharType
const *e
) const
113 std::vector
<uint8_t> tmp
= do_basic_transform(level
,b
,e
);
115 return gnu_gettext::pj_winberger_hash_function(reinterpret_cast<char *>(&tmp
.front()));
118 collate_impl(cdata
const &d
) :
125 icu::Collator
*get_collator(level_type ilevel
) const
127 int l
= limit(ilevel
);
128 static const icu::Collator::ECollationStrength levels
[level_count
] =
130 icu::Collator::PRIMARY
,
131 icu::Collator::SECONDARY
,
132 icu::Collator::TERTIARY
,
133 icu::Collator::QUATERNARY
,
134 icu::Collator::IDENTICAL
137 icu::Collator
*col
= collates_
[l
].get();
141 UErrorCode status
=U_ZERO_ERROR
;
143 collates_
[l
].reset(icu::Collator::createInstance(locale_
,status
));
145 if(U_FAILURE(status
))
146 throw std::runtime_error(std::string("Creation of collate failed:") + u_errorName(status
));
148 collates_
[l
]->setStrength(levels
[l
]);
149 return collates_
[l
].get();
153 static const int level_count
= 5;
154 icu_std_converter
<CharType
> cvt_
;
156 mutable boost::thread_specific_ptr
<icu::Collator
> collates_
[level_count
];
161 #if U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 402
163 int collate_impl
<char>::do_real_compare(
165 char const *b1
,char const *e1
,
166 char const *b2
,char const *e2
,
167 UErrorCode
&status
) const
170 return do_utf8_compare(level
,b1
,e1
,b2
,e2
,status
);
172 return do_ustring_compare(level
,b1
,e1
,b2
,e2
,status
);
176 std::locale
create_collate(std::locale
const &in
,cdata
const &cd
,character_facet_type type
)
180 return std::locale(in
,new collate_impl
<char>(cd
));
182 return std::locale(in
,new collate_impl
<wchar_t>(cd
));
183 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
185 return std::locale(in
,new collate_impl
<char16_t
>(cd
));
187 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
189 return std::locale(in
,new collate_impl
<char32_t
>(cd
));
201 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4