]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/locale/src/icu/collator.cpp
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / boost / libs / locale / src / icu / collator.cpp
1 //
2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See
5 // accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #define BOOST_LOCALE_SOURCE
9 #include <boost/locale/collator.hpp>
10 #include <boost/locale/generator.hpp>
11 #include <boost/thread.hpp>
12 #include <vector>
13 #include <limits>
14
15 #include "cdata.hpp"
16 #include "all_generator.hpp"
17 #include "uconv.hpp"
18 #include "../shared/mo_hash.hpp"
19
20 #include <unicode/coll.h>
21 #if U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 402
22 # include <unicode/stringpiece.h>
23 #endif
24
25 namespace boost {
26 namespace locale {
27 namespace impl_icu {
28 template<typename CharType>
29 class collate_impl : public collator<CharType>
30 {
31 public:
32 typedef typename collator<CharType>::level_type level_type;
33 level_type limit(level_type level) const
34 {
35 if(level < 0)
36 level=collator_base::primary;
37 else if(level >= level_count)
38 level = static_cast<level_type>(level_count - 1);
39 return level;
40 }
41
42 #if U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 402
43 int do_utf8_compare( level_type level,
44 char const *b1,char const *e1,
45 char const *b2,char const *e2,
46 UErrorCode &status) const
47 {
48 icu::StringPiece left (b1,e1-b1);
49 icu::StringPiece right(b2,e2-b2);
50 return get_collator(level)->compareUTF8(left,right,status);
51
52 }
53 #endif
54
55 int do_ustring_compare( level_type level,
56 CharType const *b1,CharType const *e1,
57 CharType const *b2,CharType const *e2,
58 UErrorCode &status) const
59 {
60 icu::UnicodeString left=cvt_.icu(b1,e1);
61 icu::UnicodeString right=cvt_.icu(b2,e2);
62 return get_collator(level)->compare(left,right,status);
63 }
64
65 int do_real_compare(level_type level,
66 CharType const *b1,CharType const *e1,
67 CharType const *b2,CharType const *e2,
68 UErrorCode &status) const
69 {
70 return do_ustring_compare(level,b1,e1,b2,e2,status);
71 }
72
73 virtual int do_compare( level_type level,
74 CharType const *b1,CharType const *e1,
75 CharType const *b2,CharType const *e2) const
76 {
77 UErrorCode status=U_ZERO_ERROR;
78
79 int res = do_real_compare(level,b1,e1,b2,e2,status);
80
81 if(U_FAILURE(status))
82 throw std::runtime_error(std::string("Collation failed:") + u_errorName(status));
83 if(res < 0)
84 return -1;
85 else if(res > 0)
86 return 1;
87 return 0;
88 }
89
90 std::vector<uint8_t> do_basic_transform(level_type level,CharType const *b,CharType const *e) const
91 {
92 icu::UnicodeString str=cvt_.icu(b,e);
93 std::vector<uint8_t> tmp;
94 tmp.resize(str.length());
95 icu::Collator *collate = get_collator(level);
96 int len = collate->getSortKey(str,&tmp[0],tmp.size());
97 if(len > int(tmp.size())) {
98 tmp.resize(len);
99 collate->getSortKey(str,&tmp[0],tmp.size());
100 }
101 else
102 tmp.resize(len);
103 return tmp;
104 }
105 std::basic_string<CharType> do_transform(level_type level,CharType const *b,CharType const *e) const
106 {
107 std::vector<uint8_t> tmp = do_basic_transform(level,b,e);
108 return std::basic_string<CharType>(tmp.begin(),tmp.end());
109 }
110
111 long do_hash(level_type level,CharType const *b,CharType const *e) const
112 {
113 std::vector<uint8_t> tmp = do_basic_transform(level,b,e);
114 tmp.push_back(0);
115 return gnu_gettext::pj_winberger_hash_function(reinterpret_cast<char *>(&tmp.front()));
116 }
117
118 collate_impl(cdata const &d) :
119 cvt_(d.encoding),
120 locale_(d.locale),
121 is_utf8_(d.utf8)
122 {
123
124 }
125 icu::Collator *get_collator(level_type ilevel) const
126 {
127 int l = limit(ilevel);
128 static const icu::Collator::ECollationStrength levels[level_count] =
129 {
130 icu::Collator::PRIMARY,
131 icu::Collator::SECONDARY,
132 icu::Collator::TERTIARY,
133 icu::Collator::QUATERNARY,
134 icu::Collator::IDENTICAL
135 };
136
137 icu::Collator *col = collates_[l].get();
138 if(col)
139 return col;
140
141 UErrorCode status=U_ZERO_ERROR;
142
143 collates_[l].reset(icu::Collator::createInstance(locale_,status));
144
145 if(U_FAILURE(status))
146 throw std::runtime_error(std::string("Creation of collate failed:") + u_errorName(status));
147
148 collates_[l]->setStrength(levels[l]);
149 return collates_[l].get();
150 }
151
152 private:
153 static const int level_count = 5;
154 icu_std_converter<CharType> cvt_;
155 icu::Locale locale_;
156 mutable boost::thread_specific_ptr<icu::Collator> collates_[level_count];
157 bool is_utf8_;
158 };
159
160
161 #if U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 402
162 template<>
163 int collate_impl<char>::do_real_compare(
164 level_type level,
165 char const *b1,char const *e1,
166 char const *b2,char const *e2,
167 UErrorCode &status) const
168 {
169 if(is_utf8_)
170 return do_utf8_compare(level,b1,e1,b2,e2,status);
171 else
172 return do_ustring_compare(level,b1,e1,b2,e2,status);
173 }
174 #endif
175
176 std::locale create_collate(std::locale const &in,cdata const &cd,character_facet_type type)
177 {
178 switch(type) {
179 case char_facet:
180 return std::locale(in,new collate_impl<char>(cd));
181 case wchar_t_facet:
182 return std::locale(in,new collate_impl<wchar_t>(cd));
183 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
184 case char16_t_facet:
185 return std::locale(in,new collate_impl<char16_t>(cd));
186 #endif
187 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
188 case char32_t_facet:
189 return std::locale(in,new collate_impl<char32_t>(cd));
190 #endif
191 default:
192 return in;
193 }
194 }
195
196 } /// impl_icu
197
198 } // locale
199 } // boost
200
201 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4