]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * Copyright Andrey Semashev 2007 - 2015. | |
3 | * Distributed under the Boost Software License, Version 1.0. | |
4 | * (See accompanying file LICENSE_1_0.txt or copy at | |
5 | * http://www.boost.org/LICENSE_1_0.txt) | |
6 | */ | |
7 | /*! | |
8 | * \file code_conversion.cpp | |
9 | * \author Andrey Semashev | |
10 | * \date 08.11.2008 | |
11 | * | |
12 | * \brief This header is the Boost.Log library implementation, see the library documentation | |
13 | * at http://www.boost.org/doc/libs/release/libs/log/doc/html/index.html. | |
14 | */ | |
15 | ||
16 | #include <boost/log/detail/config.hpp> | |
17 | #include <cstddef> | |
18 | #include <locale> | |
19 | #include <string> | |
20 | #include <stdexcept> | |
21 | #include <algorithm> | |
22 | #include <boost/log/exceptions.hpp> | |
23 | #include <boost/log/detail/code_conversion.hpp> | |
24 | #if defined(BOOST_WINDOWS) | |
25 | #include <cstring> | |
26 | #include <limits> | |
b32b8144 FG |
27 | #include <boost/winapi/get_last_error.hpp> |
28 | #include <boost/winapi/character_code_conversion.hpp> | |
7c673cae FG |
29 | #endif |
30 | #include <boost/log/detail/header.hpp> | |
31 | ||
32 | namespace boost { | |
33 | ||
34 | BOOST_LOG_OPEN_NAMESPACE | |
35 | ||
36 | namespace aux { | |
37 | ||
38 | BOOST_LOG_ANONYMOUS_NAMESPACE { | |
39 | ||
40 | //! The function performs character conversion with the specified facet | |
41 | template< typename LocalCharT > | |
42 | inline std::codecvt_base::result convert( | |
43 | std::codecvt< LocalCharT, char, std::mbstate_t > const& fac, | |
44 | std::mbstate_t& state, | |
45 | const char*& pSrcBegin, | |
46 | const char* pSrcEnd, | |
47 | LocalCharT*& pDstBegin, | |
48 | LocalCharT* pDstEnd) | |
49 | { | |
50 | return fac.in(state, pSrcBegin, pSrcEnd, pSrcBegin, pDstBegin, pDstEnd, pDstBegin); | |
51 | } | |
52 | ||
53 | //! The function performs character conversion with the specified facet | |
54 | template< typename LocalCharT > | |
55 | inline std::codecvt_base::result convert( | |
56 | std::codecvt< LocalCharT, char, std::mbstate_t > const& fac, | |
57 | std::mbstate_t& state, | |
58 | const LocalCharT*& pSrcBegin, | |
59 | const LocalCharT* pSrcEnd, | |
60 | char*& pDstBegin, | |
61 | char* pDstEnd) | |
62 | { | |
63 | return fac.out(state, pSrcBegin, pSrcEnd, pSrcBegin, pDstBegin, pDstEnd, pDstBegin); | |
64 | } | |
65 | ||
66 | } // namespace | |
67 | ||
68 | template< typename SourceCharT, typename TargetCharT, typename FacetT > | |
69 | inline std::size_t code_convert(const SourceCharT* begin, const SourceCharT* end, std::basic_string< TargetCharT >& converted, std::size_t max_size, FacetT const& fac) | |
70 | { | |
71 | typedef typename FacetT::state_type state_type; | |
72 | TargetCharT converted_buffer[256]; | |
73 | ||
74 | const SourceCharT* const original_begin = begin; | |
75 | state_type state = state_type(); | |
b32b8144 | 76 | std::size_t buf_size = (std::min)(max_size, sizeof(converted_buffer) / sizeof(*converted_buffer)); |
7c673cae FG |
77 | while (begin != end && buf_size > 0u) |
78 | { | |
79 | TargetCharT* dest = converted_buffer; | |
80 | std::codecvt_base::result res = convert( | |
81 | fac, | |
82 | state, | |
83 | begin, | |
84 | end, | |
85 | dest, | |
86 | dest + buf_size); | |
87 | ||
88 | switch (res) | |
89 | { | |
90 | case std::codecvt_base::ok: | |
91 | // All characters were successfully converted | |
92 | // NOTE: MSVC 11 also returns ok when the source buffer was only partially consumed, so we also check that the begin pointer has reached the end. | |
93 | converted.append(converted_buffer, dest); | |
94 | max_size -= dest - converted_buffer; | |
95 | break; | |
96 | ||
97 | case std::codecvt_base::noconv: | |
98 | { | |
99 | // Not possible, unless both character types are actually equivalent | |
b32b8144 | 100 | const std::size_t size = (std::min)(max_size, static_cast< std::size_t >(end - begin)); |
7c673cae FG |
101 | converted.append(begin, begin + size); |
102 | begin += size; | |
103 | max_size -= size; | |
104 | } | |
105 | goto done; | |
106 | ||
107 | case std::codecvt_base::partial: | |
108 | // Some characters were converted, some were not | |
109 | if (dest != converted_buffer) | |
110 | { | |
111 | // Some conversion took place, so it seems like | |
112 | // the destination buffer might not have been long enough | |
113 | converted.append(converted_buffer, dest); | |
114 | max_size -= dest - converted_buffer; | |
115 | ||
116 | // ...and go on for the next part | |
117 | break; | |
118 | } | |
119 | else | |
120 | { | |
121 | // Nothing was converted | |
122 | if (begin == end) | |
123 | goto done; | |
124 | ||
125 | // Looks like the tail of the source buffer contains only part of the last character. | |
126 | // In this case we intentionally fall through to throw an exception. | |
127 | } | |
128 | ||
129 | default: // std::codecvt_base::error | |
130 | BOOST_LOG_THROW_DESCR(conversion_error, "Could not convert character encoding"); | |
131 | } | |
132 | ||
b32b8144 | 133 | buf_size = (std::min)(max_size, sizeof(converted_buffer) / sizeof(*converted_buffer)); |
7c673cae FG |
134 | } |
135 | ||
136 | done: | |
137 | return static_cast< std::size_t >(begin - original_begin); | |
138 | } | |
139 | ||
140 | //! The function converts one string to the character type of another | |
141 | BOOST_LOG_API bool code_convert_impl(const wchar_t* str1, std::size_t len, std::string& str2, std::size_t max_size, std::locale const& loc) | |
142 | { | |
143 | return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< wchar_t, char, std::mbstate_t > >(loc)) == len; | |
144 | } | |
145 | ||
146 | //! The function converts one string to the character type of another | |
147 | BOOST_LOG_API bool code_convert_impl(const char* str1, std::size_t len, std::wstring& str2, std::size_t max_size, std::locale const& loc) | |
148 | { | |
149 | return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< wchar_t, char, std::mbstate_t > >(loc)) == len; | |
150 | } | |
151 | ||
152 | #if !defined(BOOST_LOG_NO_CXX11_CODECVT_FACETS) | |
153 | ||
154 | #if !defined(BOOST_NO_CXX11_CHAR16_T) | |
155 | ||
156 | //! The function converts one string to the character type of another | |
157 | BOOST_LOG_API bool code_convert_impl(const char16_t* str1, std::size_t len, std::string& str2, std::size_t max_size, std::locale const& loc) | |
158 | { | |
159 | return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< char16_t, char, std::mbstate_t > >(loc)) == len; | |
160 | } | |
161 | ||
162 | //! The function converts one string to the character type of another | |
163 | BOOST_LOG_API bool code_convert_impl(const char* str1, std::size_t len, std::u16string& str2, std::size_t max_size, std::locale const& loc) | |
164 | { | |
165 | return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< char16_t, char, std::mbstate_t > >(loc)) == len; | |
166 | } | |
167 | ||
168 | //! The function converts one string to the character type of another | |
169 | BOOST_LOG_API bool code_convert_impl(const char16_t* str1, std::size_t len, std::wstring& str2, std::size_t max_size, std::locale const& loc) | |
170 | { | |
171 | std::string temp_str; | |
172 | code_convert(str1, str1 + len, temp_str, temp_str.max_size(), std::use_facet< std::codecvt< char16_t, char, std::mbstate_t > >(loc)); | |
173 | const std::size_t temp_size = temp_str.size(); | |
174 | return code_convert(temp_str.c_str(), temp_str.c_str() + temp_size, str2, max_size, std::use_facet< std::codecvt< wchar_t, char, std::mbstate_t > >(loc)) == temp_size; | |
175 | } | |
176 | ||
177 | #endif | |
178 | ||
179 | #if !defined(BOOST_NO_CXX11_CHAR32_T) | |
180 | ||
181 | //! The function converts one string to the character type of another | |
182 | BOOST_LOG_API bool code_convert_impl(const char32_t* str1, std::size_t len, std::string& str2, std::size_t max_size, std::locale const& loc) | |
183 | { | |
184 | return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< char32_t, char, std::mbstate_t > >(loc)) == len; | |
185 | } | |
186 | ||
187 | //! The function converts one string to the character type of another | |
188 | BOOST_LOG_API bool code_convert_impl(const char* str1, std::size_t len, std::u32string& str2, std::size_t max_size, std::locale const& loc) | |
189 | { | |
190 | return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< char32_t, char, std::mbstate_t > >(loc)) == len; | |
191 | } | |
192 | ||
193 | //! The function converts one string to the character type of another | |
194 | BOOST_LOG_API bool code_convert_impl(const char32_t* str1, std::size_t len, std::wstring& str2, std::size_t max_size, std::locale const& loc) | |
195 | { | |
196 | std::string temp_str; | |
197 | code_convert(str1, str1 + len, temp_str, temp_str.max_size(), std::use_facet< std::codecvt< char32_t, char, std::mbstate_t > >(loc)); | |
198 | const std::size_t temp_size = temp_str.size(); | |
199 | return code_convert(temp_str.c_str(), temp_str.c_str() + temp_size, str2, max_size, std::use_facet< std::codecvt< wchar_t, char, std::mbstate_t > >(loc)) == temp_size; | |
200 | } | |
201 | ||
202 | #endif | |
203 | ||
204 | #if !defined(BOOST_NO_CXX11_CHAR16_T) && !defined(BOOST_NO_CXX11_CHAR32_T) | |
205 | ||
206 | //! The function converts one string to the character type of another | |
207 | BOOST_LOG_API bool code_convert_impl(const char16_t* str1, std::size_t len, std::u32string& str2, std::size_t max_size, std::locale const& loc) | |
208 | { | |
209 | std::string temp_str; | |
210 | code_convert(str1, str1 + len, temp_str, temp_str.max_size(), std::use_facet< std::codecvt< char16_t, char, std::mbstate_t > >(loc)); | |
211 | const std::size_t temp_size = temp_str.size(); | |
212 | return code_convert(temp_str.c_str(), temp_str.c_str() + temp_size, str2, max_size, std::use_facet< std::codecvt< char32_t, char, std::mbstate_t > >(loc)) == temp_size; | |
213 | } | |
214 | ||
215 | //! The function converts one string to the character type of another | |
216 | BOOST_LOG_API bool code_convert_impl(const char32_t* str1, std::size_t len, std::u16string& str2, std::size_t max_size, std::locale const& loc) | |
217 | { | |
218 | std::string temp_str; | |
219 | code_convert(str1, str1 + len, temp_str, temp_str.max_size(), std::use_facet< std::codecvt< char32_t, char, std::mbstate_t > >(loc)); | |
220 | const std::size_t temp_size = temp_str.size(); | |
221 | return code_convert(temp_str.c_str(), temp_str.c_str() + temp_size, str2, max_size, std::use_facet< std::codecvt< char16_t, char, std::mbstate_t > >(loc)) == temp_size; | |
222 | } | |
223 | ||
224 | #endif | |
225 | ||
226 | #endif // !defined(BOOST_LOG_NO_CXX11_CODECVT_FACETS) | |
227 | ||
228 | #if defined(BOOST_WINDOWS) | |
229 | ||
230 | //! Converts UTF-8 to UTF-16 | |
231 | std::wstring utf8_to_utf16(const char* str) | |
232 | { | |
233 | std::size_t utf8_len = std::strlen(str); | |
234 | if (utf8_len == 0) | |
235 | return std::wstring(); | |
236 | else if (BOOST_UNLIKELY(utf8_len > static_cast< std::size_t >((std::numeric_limits< int >::max)()))) | |
237 | BOOST_LOG_THROW_DESCR(bad_alloc, "UTF-8 string too long"); | |
238 | ||
b32b8144 | 239 | int len = boost::winapi::MultiByteToWideChar(boost::winapi::CP_UTF8_, boost::winapi::MB_ERR_INVALID_CHARS_, str, static_cast< int >(utf8_len), NULL, 0); |
7c673cae FG |
240 | if (BOOST_LIKELY(len > 0)) |
241 | { | |
242 | std::wstring wstr; | |
243 | wstr.resize(len); | |
244 | ||
b32b8144 | 245 | len = boost::winapi::MultiByteToWideChar(boost::winapi::CP_UTF8_, boost::winapi::MB_ERR_INVALID_CHARS_, str, static_cast< int >(utf8_len), &wstr[0], len); |
7c673cae FG |
246 | if (BOOST_LIKELY(len > 0)) |
247 | { | |
248 | return wstr; | |
249 | } | |
250 | } | |
251 | ||
b32b8144 | 252 | const boost::winapi::DWORD_ err = boost::winapi::GetLastError(); |
7c673cae FG |
253 | BOOST_LOG_THROW_DESCR_PARAMS(system_error, "Failed to convert UTF-8 to UTF-16", (err)); |
254 | BOOST_LOG_UNREACHABLE_RETURN(std::wstring()); | |
255 | } | |
256 | ||
257 | //! Converts UTF-16 to UTF-8 | |
258 | std::string utf16_to_utf8(const wchar_t* wstr) | |
259 | { | |
260 | std::size_t utf16_len = std::wcslen(wstr); | |
261 | if (utf16_len == 0) | |
262 | return std::string(); | |
263 | else if (BOOST_UNLIKELY(utf16_len > static_cast< std::size_t >((std::numeric_limits< int >::max)()))) | |
264 | BOOST_LOG_THROW_DESCR(bad_alloc, "UTF-16 string too long"); | |
265 | ||
b32b8144 | 266 | const boost::winapi::DWORD_ flags = |
7c673cae | 267 | #if BOOST_USE_WINAPI_VERSION >= BOOST_WINAPI_VERSION_WIN6 |
b32b8144 | 268 | boost::winapi::WC_ERR_INVALID_CHARS_; |
7c673cae FG |
269 | #else |
270 | 0u; | |
271 | #endif | |
b32b8144 | 272 | int len = boost::winapi::WideCharToMultiByte(boost::winapi::CP_UTF8_, flags, wstr, static_cast< int >(utf16_len), NULL, 0, NULL, NULL); |
7c673cae FG |
273 | if (BOOST_LIKELY(len > 0)) |
274 | { | |
275 | std::string str; | |
276 | str.resize(len); | |
277 | ||
b32b8144 | 278 | len = boost::winapi::WideCharToMultiByte(boost::winapi::CP_UTF8_, flags, wstr, static_cast< int >(utf16_len), &str[0], len, NULL, NULL); |
7c673cae FG |
279 | if (BOOST_LIKELY(len > 0)) |
280 | { | |
281 | return str; | |
282 | } | |
283 | } | |
284 | ||
b32b8144 | 285 | const boost::winapi::DWORD_ err = boost::winapi::GetLastError(); |
7c673cae FG |
286 | BOOST_LOG_THROW_DESCR_PARAMS(system_error, "Failed to convert UTF-16 to UTF-8", (err)); |
287 | BOOST_LOG_UNREACHABLE_RETURN(std::string()); | |
288 | } | |
289 | ||
290 | #endif // defined(BOOST_WINDOWS) | |
291 | ||
292 | } // namespace aux | |
293 | ||
294 | BOOST_LOG_CLOSE_NAMESPACE // namespace log | |
295 | ||
296 | } // namespace boost | |
297 | ||
298 | #include <boost/log/detail/footer.hpp> |