]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*============================================================================= |
2 | Copyright (c) 2001-2011 Joel de Guzman | |
3 | ||
4 | Distributed under the Boost Software License, Version 1.0. (See accompanying | |
5 | file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
6 | ==============================================================================*/ | |
7 | #if !defined(BOOST_SPIRIT_UC_TYPES_NOVEMBER_23_2008_0840PM) | |
8 | #define BOOST_SPIRIT_UC_TYPES_NOVEMBER_23_2008_0840PM | |
9 | ||
10 | #if defined(_MSC_VER) | |
11 | #pragma once | |
12 | #endif | |
13 | ||
14 | #include <boost/cstdint.hpp> | |
7c673cae FG |
15 | #include <boost/regex/pending/unicode_iterator.hpp> |
16 | #include <boost/type_traits/make_unsigned.hpp> | |
1e59de90 | 17 | #include <iterator> |
7c673cae FG |
18 | #include <string> |
19 | ||
20 | namespace boost { namespace spirit | |
21 | { | |
22 | typedef ::boost::uint32_t ucs4_char; | |
23 | typedef char utf8_char; | |
24 | typedef std::basic_string<ucs4_char> ucs4_string; | |
25 | typedef std::basic_string<utf8_char> utf8_string; | |
26 | ||
27 | template <typename Char> | |
28 | inline utf8_string to_utf8(Char value) | |
29 | { | |
30 | // always store as UTF8 | |
31 | utf8_string result; | |
32 | typedef std::back_insert_iterator<utf8_string> insert_iter; | |
33 | insert_iter out_iter(result); | |
34 | utf8_output_iterator<insert_iter> utf8_iter(out_iter); | |
35 | typedef typename make_unsigned<Char>::type UChar; | |
36 | *utf8_iter = (UChar)value; | |
37 | return result; | |
38 | } | |
39 | ||
40 | template <typename Char> | |
41 | inline utf8_string to_utf8(Char const* str) | |
42 | { | |
43 | // always store as UTF8 | |
44 | utf8_string result; | |
45 | typedef std::back_insert_iterator<utf8_string> insert_iter; | |
46 | insert_iter out_iter(result); | |
47 | utf8_output_iterator<insert_iter> utf8_iter(out_iter); | |
48 | typedef typename make_unsigned<Char>::type UChar; | |
49 | while (*str) | |
50 | *utf8_iter++ = (UChar)*str++; | |
51 | return result; | |
52 | } | |
53 | ||
54 | template <typename Char, typename Traits, typename Allocator> | |
55 | inline utf8_string | |
56 | to_utf8(std::basic_string<Char, Traits, Allocator> const& str) | |
57 | { | |
58 | // always store as UTF8 | |
59 | utf8_string result; | |
60 | typedef std::back_insert_iterator<utf8_string> insert_iter; | |
61 | insert_iter out_iter(result); | |
62 | utf8_output_iterator<insert_iter> utf8_iter(out_iter); | |
63 | typedef typename make_unsigned<Char>::type UChar; | |
1e59de90 TL |
64 | for (Char const* ptr = str.data(), |
65 | * end = ptr + str.size(); ptr < end; ++ptr) | |
7c673cae | 66 | { |
1e59de90 | 67 | *utf8_iter++ = (UChar)*ptr; |
7c673cae FG |
68 | } |
69 | return result; | |
70 | } | |
92f5a8d4 | 71 | |
f67539c2 TL |
72 | // Assume wchar_t content is UTF-16 on MSVC, or mingw/wineg++ with -fshort-wchar |
73 | #if defined(_MSC_VER) || defined(__SIZEOF_WCHAR_T__) && __SIZEOF_WCHAR_T__ == 2 | |
92f5a8d4 TL |
74 | inline utf8_string to_utf8(wchar_t value) |
75 | { | |
76 | utf8_string result; | |
77 | typedef std::back_insert_iterator<utf8_string> insert_iter; | |
78 | insert_iter out_iter(result); | |
79 | utf8_output_iterator<insert_iter> utf8_iter(out_iter); | |
80 | ||
81 | u16_to_u32_iterator<wchar_t const*, ucs4_char> ucs4_iter(&value); | |
82 | *utf8_iter++ = *ucs4_iter; | |
83 | ||
84 | return result; | |
85 | } | |
86 | ||
87 | inline utf8_string to_utf8(wchar_t const* str) | |
88 | { | |
89 | utf8_string result; | |
90 | typedef std::back_insert_iterator<utf8_string> insert_iter; | |
91 | insert_iter out_iter(result); | |
92 | utf8_output_iterator<insert_iter> utf8_iter(out_iter); | |
93 | ||
94 | u16_to_u32_iterator<wchar_t const*, ucs4_char> ucs4_iter(str); | |
95 | for (ucs4_char c; (c = *ucs4_iter) != ucs4_char(); ++ucs4_iter) { | |
96 | *utf8_iter++ = c; | |
97 | } | |
98 | ||
99 | return result; | |
100 | } | |
101 | ||
102 | template <typename Traits, typename Allocator> | |
103 | inline utf8_string | |
104 | to_utf8(std::basic_string<wchar_t, Traits, Allocator> const& str) | |
105 | { | |
106 | return to_utf8(str.c_str()); | |
107 | } | |
108 | #endif | |
7c673cae FG |
109 | }} |
110 | ||
111 | #endif |