[ceph.git] / ceph / src / boost / libs / detail / test / test_utf8_codecvt.cpp

/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
// test_utf8_codecvt.cpp

// (C) Copyright 2002-4 Robert Ramey - http://www.rrsd.com . 
// Use, modification and distribution is subject to the Boost Software
// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

#include <algorithm> // std::copy
#include <fstream>
#include <iostream>
#include <iterator>
#include <locale>
#include <vector>
#include <string>

#include <cstddef> // size_t
#include <cwchar>
#include <boost/config.hpp>

#define BOOST_UTF8_BEGIN_NAMESPACE namespace boost { namespace detail {
#define BOOST_UTF8_END_NAMESPACE } }
#include <boost/detail/utf8_codecvt_facet.hpp>
#include <boost/detail/utf8_codecvt_facet.ipp>

#if defined(BOOST_NO_STDC_NAMESPACE)
namespace std{ 
    using ::size_t; 
    using ::wcslen;
#if !defined(UNDER_CE) && !defined(__PGIC__) 
    using ::w_int;
#endif
} // namespace std
#endif

// Note: copied from boost/iostreams/char_traits.hpp
//
// Dinkumware that comes with QNX Momentics 6.3.0, 4.0.2, incorrectly defines
// the EOF and WEOF macros to not std:: qualify the wint_t type (and so does
// Sun C++ 5.8 + STLport 4). Fix by placing the def in this scope.
// NOTE: Use BOOST_WORKAROUND?
#if (defined(__QNX__) && defined(BOOST_DINKUMWARE_STDLIB))  \
    || defined(__SUNPRO_CC)
    using ::std::wint_t;
#endif

#include <boost/core/lightweight_test.hpp>

template<std::size_t s>
struct test_data
{
    static unsigned char utf8_encoding[];
    static wchar_t wchar_encoding[];
};

template<>
unsigned char test_data<2>::utf8_encoding[] = {
    0x01,
    0x7f,
    0xc2, 0x80,
    0xdf, 0xbf,
    0xe0, 0xa0, 0x80,
    0xe7, 0xbf, 0xbf
};

template<>
wchar_t test_data<2>::wchar_encoding[] = {
    0x0001,
    0x007f,
    0x0080,
    0x07ff,
    0x0800,
    0x7fff
};

template<>
unsigned char test_data<4>::utf8_encoding[] = {
    0x01,
    0x7f,
    0xc2, 0x80,
    0xdf, 0xbf,
    0xe0, 0xa0, 0x80,
    0xef, 0xbf, 0xbf,
    0xf0, 0x90, 0x80, 0x80,
    0xf4, 0x8f, 0xbf, 0xbf,
    /* codecvt implementations for clang and gcc don't handle more than 21 bits and
     * return eof accordlingly.  So don't test the whole 32 range
     */
    /*
    0xf7, 0xbf, 0xbf, 0xbf,
    0xf8, 0x88, 0x80, 0x80, 0x80,
    0xfb, 0xbf, 0xbf, 0xbf, 0xbf,
    0xfc, 0x84, 0x80, 0x80, 0x80, 0x80,
    0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf
    */
};

template<>
wchar_t test_data<4>::wchar_encoding[] = {
    (wchar_t)0x00000001,
    (wchar_t)0x0000007f,
    (wchar_t)0x00000080,
    (wchar_t)0x000007ff,
    (wchar_t)0x00000800,
    (wchar_t)0x0000ffff,
    (wchar_t)0x00010000,
    (wchar_t)0x0010ffff,
    /* codecvt implementations for clang and gcc don't handle more than 21 bits and
     * return eof accordlingly.  So don't test the whole 32 range
     */
    /*
    (wchar_t)0x001fffff,
    (wchar_t)0x00200000,
    (wchar_t)0x03ffffff,
    (wchar_t)0x04000000,
    (wchar_t)0x7fffffff
    */
};

int
test_main(int /* argc */, char * /* argv */[]) {
    std::locale utf8_locale
        = std::locale(
            std::locale::classic(),
            new boost::detail::utf8_codecvt_facet
        );

    typedef char utf8_t;
    // define test data compatible with the wchar_t implementation
    // as either ucs-2 or ucs-4 depending on the compiler/library.
    typedef test_data<sizeof(wchar_t)> td;

    // Send our test UTF-8 data to file
    {
        std::ofstream ofs;
        ofs.open("test.dat");
        std::copy(
            td::utf8_encoding,
            td::utf8_encoding + sizeof(td::utf8_encoding) / sizeof(unsigned char),
            std::ostream_iterator<utf8_t>(ofs)
        );
    }

    // Read the test data back in, converting to UCS-4 on the way in
    std::vector<wchar_t> from_file;
    {
        std::wifstream ifs;
        ifs.imbue(utf8_locale);
        ifs.open("test.dat");

        std::wint_t item = 0;
        // note can't use normal vector from iterator constructor because
        // dinkumware doesn't have it.
        for(;;){
            item = ifs.get();
            if(item == WEOF)
                break;
            //ifs >> item;
            //if(ifs.eof())
            //    break;
            from_file.push_back(item);
        }
    }

    BOOST_TEST(std::equal(from_file.begin(), from_file.end(), td::wchar_encoding));
  
    // Send the UCS4_data back out, converting to UTF-8
    {
        std::wofstream ofs;
        ofs.imbue(utf8_locale);
        ofs.open("test2.dat");
        std::copy(
            from_file.begin(),
            from_file.end(),
            std::ostream_iterator<wchar_t, wchar_t>(ofs)
        );
    }

    // Make sure that both files are the same
    {
        typedef std::istream_iterator<utf8_t> is_iter;
        is_iter end_iter;

        std::ifstream ifs1("test.dat");
        is_iter it1(ifs1);
        std::vector<utf8_t> data1;
        std::copy(it1, end_iter, std::back_inserter(data1));

        std::ifstream ifs2("test2.dat");
        is_iter it2(ifs2);
        std::vector<utf8_t> data2;
        std::copy(it2, end_iter, std::back_inserter(data2));

        BOOST_TEST(data1 == data2);
    }

    // some libraries have trouble that only shows up with longer strings
    
    const wchar_t * test3_data = L"\
    <?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?>\
    <!DOCTYPE boost_serialization>\
    <boost_serialization signature=\"serialization::archive\" version=\"3\">\
    <a class_id=\"0\" tracking_level=\"0\">\
        <b>1</b>\
        <f>96953204</f>\
        <g>177129195</g>\
        <l>1</l>\
        <m>5627</m>\
        <n>23010</n>\
        <o>7419</o>\
        <p>16212</p>\
        <q>4086</q>\
        <r>2749</r>\
        <c>-33</c>\
        <s>124</s>\
        <t>28</t>\
        <u>32225</u>\
        <v>17543</v>\
        <w>0.84431422</w>\
        <x>1.0170664757130923</x>\
        <y>tjbx</y>\
        <z>cuwjentqpkejp</z>\
    </a>\
    </boost_serialization>\
    ";
    
    // Send the UCS4_data back out, converting to UTF-8
    std::size_t l = std::wcslen(test3_data);
    {
        std::wofstream ofs;
        ofs.imbue(utf8_locale);
        ofs.open("test3.dat");
        std::copy(
            test3_data,
            test3_data + l,
            std::ostream_iterator<wchar_t, wchar_t>(ofs)
        );
    }

    // Make sure that both files are the same
    {
        std::wifstream ifs;
        ifs.imbue(utf8_locale);
        ifs.open("test3.dat");
        ifs >> std::noskipws;
        BOOST_TEST(
            std::equal(
                test3_data,
                test3_data + l,
                std::istream_iterator<wchar_t, wchar_t>(ifs)
            )
        );
    }

    // Test length calculation
    {
        std::codecvt<wchar_t, char, std::mbstate_t> const& fac = std::use_facet< std::codecvt<wchar_t, char, std::mbstate_t> >(utf8_locale);
        std::mbstate_t mbs = std::mbstate_t();
        const int utf8_len = sizeof(td::utf8_encoding) / sizeof(*td::utf8_encoding);
        int res = fac.length(mbs, reinterpret_cast< const char* >(td::utf8_encoding), reinterpret_cast< const char* >(td::utf8_encoding + utf8_len), ~static_cast< std::size_t >(0u));
        BOOST_TEST_EQ(utf8_len, res);
    }

    // Test that length calculation detects character boundaries
    {
        std::codecvt<wchar_t, char, std::mbstate_t> const& fac = std::use_facet< std::codecvt<wchar_t, char, std::mbstate_t> >(utf8_locale);
        std::mbstate_t mbs = std::mbstate_t();
        // The first 5 bytes of utf8_encoding contain 3 complete UTF-8 characters (taking 4 bytes in total) and 1 byte of an incomplete character.
        // This last byte should not be accounted by length().
        const int input_len = 5;
        const int utf8_len = 4;
        int res = fac.length(mbs, reinterpret_cast< const char* >(td::utf8_encoding), reinterpret_cast< const char* >(td::utf8_encoding + input_len), ~static_cast< std::size_t >(0u));
        BOOST_TEST_EQ(utf8_len, res);
    }

    return EXIT_SUCCESS;
}

int
main(int argc, char * argv[]){

    int retval = 1;
    BOOST_TRY{
        retval = test_main(argc, argv);
    }
    #ifndef BOOST_NO_EXCEPTION_STD_NAMESPACE
        BOOST_CATCH(const std::exception & e){
            BOOST_ERROR(e.what());
        }
    #endif
    BOOST_CATCH(...){
        BOOST_ERROR("failed with uncaught exception:");
    }
    BOOST_CATCH_END

    int error_count = boost::report_errors();
    if(error_count > 0)
        retval = error_count;
    return retval;
}
Commit	Line	Data
7c673cae FG	1	/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
	2	// test_utf8_codecvt.cpp
	3
	4	// (C) Copyright 2002-4 Robert Ramey - http://www.rrsd.com .
	5	// Use, modification and distribution is subject to the Boost Software
	6	// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
	7	// http://www.boost.org/LICENSE_1_0.txt)
	8
	9	#include <algorithm> // std::copy
	10	#include <fstream>
	11	#include <iostream>
	12	#include <iterator>
	13	#include <locale>
	14	#include <vector>
	15	#include <string>
	16
	17	#include <cstddef> // size_t
	18	#include <cwchar>
	19	#include <boost/config.hpp>
	20
	21	#define BOOST_UTF8_BEGIN_NAMESPACE namespace boost { namespace detail {
	22	#define BOOST_UTF8_END_NAMESPACE } }
	23	#include <boost/detail/utf8_codecvt_facet.hpp>
	24	#include <boost/detail/utf8_codecvt_facet.ipp>
	25
	26	#if defined(BOOST_NO_STDC_NAMESPACE)
	27	namespace std{
	28	using ::size_t;
	29	using ::wcslen;
	30	#if !defined(UNDER_CE) && !defined(__PGIC__)
	31	using ::w_int;
	32	#endif
	33	} // namespace std
	34	#endif
	35
	36	// Note: copied from boost/iostreams/char_traits.hpp
	37	//
	38	// Dinkumware that comes with QNX Momentics 6.3.0, 4.0.2, incorrectly defines
	39	// the EOF and WEOF macros to not std:: qualify the wint_t type (and so does
	40	// Sun C++ 5.8 + STLport 4). Fix by placing the def in this scope.
	41	// NOTE: Use BOOST_WORKAROUND?
	42	#if (defined(__QNX__) && defined(BOOST_DINKUMWARE_STDLIB)) \
	43	\|\| defined(__SUNPRO_CC)
	44	using ::std::wint_t;
	45	#endif
	46
	47	#include <boost/core/lightweight_test.hpp>
	48
	49	template<std::size_t s>
	50	struct test_data
	51	{
	52	static unsigned char utf8_encoding[];
	53	static wchar_t wchar_encoding[];
	54	};
	55
	56	template<>
	57	unsigned char test_data<2>::utf8_encoding[] = {
	58	0x01,
	59	0x7f,
	60	0xc2, 0x80,
	61	0xdf, 0xbf,
	62	0xe0, 0xa0, 0x80,
	63	0xe7, 0xbf, 0xbf
	64	};
65
66	template<>
67	wchar_t test_data<2>::wchar_encoding[] = {
68	0x0001,
69	0x007f,
70	0x0080,
71	0x07ff,
72	0x0800,
73	0x7fff
74	};
75
76	template<>
77	unsigned char test_data<4>::utf8_encoding[] = {
78	0x01,
79	0x7f,
80	0xc2, 0x80,
81	0xdf, 0xbf,
82	0xe0, 0xa0, 0x80,
83	0xef, 0xbf, 0xbf,
84	0xf0, 0x90, 0x80, 0x80,
85	0xf4, 0x8f, 0xbf, 0xbf,
86	/* codecvt implementations for clang and gcc don't handle more than 21 bits and
87	* return eof accordlingly. So don't test the whole 32 range
88	*/
89	/*
90	0xf7, 0xbf, 0xbf, 0xbf,
91	0xf8, 0x88, 0x80, 0x80, 0x80,
92	0xfb, 0xbf, 0xbf, 0xbf, 0xbf,
93	0xfc, 0x84, 0x80, 0x80, 0x80, 0x80,
94	0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf
95	*/
96	};
97
98	template<>
99	wchar_t test_data<4>::wchar_encoding[] = {
100	(wchar_t)0x00000001,
101	(wchar_t)0x0000007f,
102	(wchar_t)0x00000080,
103	(wchar_t)0x000007ff,
104	(wchar_t)0x00000800,
105	(wchar_t)0x0000ffff,
106	(wchar_t)0x00010000,
107	(wchar_t)0x0010ffff,
108	/* codecvt implementations for clang and gcc don't handle more than 21 bits and
109	* return eof accordlingly. So don't test the whole 32 range
110	*/
111	/*
112	(wchar_t)0x001fffff,
113	(wchar_t)0x00200000,
114	(wchar_t)0x03ffffff,
115	(wchar_t)0x04000000,
116	(wchar_t)0x7fffffff
117	*/
118	};
119
120	int
121	test_main(int /* argc /, char /* argv */[]) {
122	std::locale utf8_locale
123	= std::locale(
124	std::locale::classic(),
125	new boost::detail::utf8_codecvt_facet
126	);
127
128	typedef char utf8_t;
129	// define test data compatible with the wchar_t implementation
130	// as either ucs-2 or ucs-4 depending on the compiler/library.
131	typedef test_data<sizeof(wchar_t)> td;
132
133	// Send our test UTF-8 data to file
134	{
135	std::ofstream ofs;
136	ofs.open("test.dat");
137	std::copy(
138	td::utf8_encoding,
139	td::utf8_encoding + sizeof(td::utf8_encoding) / sizeof(unsigned char),
140	std::ostream_iterator<utf8_t>(ofs)
141	);
142	}
143
144	// Read the test data back in, converting to UCS-4 on the way in
145	std::vector<wchar_t> from_file;
146	{
147	std::wifstream ifs;
148	ifs.imbue(utf8_locale);
149	ifs.open("test.dat");
150
151	std::wint_t item = 0;
152	// note can't use normal vector from iterator constructor because
153	// dinkumware doesn't have it.
154	for(;;){
155	item = ifs.get();
156	if(item == WEOF)
157	break;
158	//ifs >> item;
159	//if(ifs.eof())
160	// break;
161	from_file.push_back(item);
162	}
163	}
164
165	BOOST_TEST(std::equal(from_file.begin(), from_file.end(), td::wchar_encoding));
166
167	// Send the UCS4_data back out, converting to UTF-8
168	{
169	std::wofstream ofs;
170	ofs.imbue(utf8_locale);
171	ofs.open("test2.dat");
172	std::copy(
173	from_file.begin(),
174	from_file.end(),
175	std::ostream_iterator<wchar_t, wchar_t>(ofs)
176	);
177	}
178
179	// Make sure that both files are the same
180	{
181	typedef std::istream_iterator<utf8_t> is_iter;
182	is_iter end_iter;
183
184	std::ifstream ifs1("test.dat");
185	is_iter it1(ifs1);
186	std::vector<utf8_t> data1;
187	std::copy(it1, end_iter, std::back_inserter(data1));
188
189	std::ifstream ifs2("test2.dat");
190	is_iter it2(ifs2);
191	std::vector<utf8_t> data2;
192	std::copy(it2, end_iter, std::back_inserter(data2));
193
194	BOOST_TEST(data1 == data2);
195	}
196
197	// some libraries have trouble that only shows up with longer strings
198
199	const wchar_t * test3_data = L"\
200	<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?>\
201	<!DOCTYPE boost_serialization>\
202	<boost_serialization signature=\"serialization::archive\" version=\"3\">\
203	<a class_id=\"0\" tracking_level=\"0\">\
204	<b>1</b>\
205	<f>96953204</f>\
206	<g>177129195</g>\
207	<l>1</l>\
208	<m>5627</m>\
209	<n>23010</n>\
210	<o>7419</o>\
211	<p>16212</p>\
212	<q>4086</q>\
213	<r>2749</r>\
214	<c>-33</c>\
215	<s>124</s>\
216	<t>28</t>\
217	<u>32225</u>\
218	<v>17543</v>\
219	<w>0.84431422</w>\
220	<x>1.0170664757130923</x>\
221	<y>tjbx</y>\
222	<z>cuwjentqpkejp</z>\
223	</a>\
224	</boost_serialization>\
225	";
226
227	// Send the UCS4_data back out, converting to UTF-8
228	std::size_t l = std::wcslen(test3_data);
229	{
230	std::wofstream ofs;
231	ofs.imbue(utf8_locale);
232	ofs.open("test3.dat");
233	std::copy(
234	test3_data,
235	test3_data + l,
236	std::ostream_iterator<wchar_t, wchar_t>(ofs)
237	);
238	}
239
240	// Make sure that both files are the same
241	{
242	std::wifstream ifs;
243	ifs.imbue(utf8_locale);
244	ifs.open("test3.dat");
245	ifs >> std::noskipws;
246	BOOST_TEST(
247	std::equal(
248	test3_data,
249	test3_data + l,
250	std::istream_iterator<wchar_t, wchar_t>(ifs)
251	)
252	);
253	}
254
255	// Test length calculation
256	{
257	std::codecvt<wchar_t, char, std::mbstate_t> const& fac = std::use_facet< std::codecvt<wchar_t, char, std::mbstate_t> >(utf8_locale);
258	std::mbstate_t mbs = std::mbstate_t();
259	const int utf8_len = sizeof(td::utf8_encoding) / sizeof(*td::utf8_encoding);
260	int res = fac.length(mbs, reinterpret_cast< const char* >(td::utf8_encoding), reinterpret_cast< const char* >(td::utf8_encoding + utf8_len), ~static_cast< std::size_t >(0u));
261	BOOST_TEST_EQ(utf8_len, res);
262	}
263
b32b8144 FG	264	// Test that length calculation detects character boundaries
	265	{
	266	std::codecvt<wchar_t, char, std::mbstate_t> const& fac = std::use_facet< std::codecvt<wchar_t, char, std::mbstate_t> >(utf8_locale);
	267	std::mbstate_t mbs = std::mbstate_t();
	268	// The first 5 bytes of utf8_encoding contain 3 complete UTF-8 characters (taking 4 bytes in total) and 1 byte of an incomplete character.
	269	// This last byte should not be accounted by length().
	270	const int input_len = 5;
	271	const int utf8_len = 4;
	272	int res = fac.length(mbs, reinterpret_cast< const char* >(td::utf8_encoding), reinterpret_cast< const char* >(td::utf8_encoding + input_len), ~static_cast< std::size_t >(0u));
	273	BOOST_TEST_EQ(utf8_len, res);
	274	}
	275
7c673cae FG	276	return EXIT_SUCCESS;
	277	}
	278
	279	int
	280	main(int argc, char * argv[]){
	281
	282	int retval = 1;
	283	BOOST_TRY{
	284	retval = test_main(argc, argv);
	285	}
	286	#ifndef BOOST_NO_EXCEPTION_STD_NAMESPACE
	287	BOOST_CATCH(const std::exception & e){
	288	BOOST_ERROR(e.what());
	289	}
	290	#endif
	291	BOOST_CATCH(...){
	292	BOOST_ERROR("failed with uncaught exception:");
	293	}
	294	BOOST_CATCH_END
	295
	296	int error_count = boost::report_errors();
	297	if(error_count > 0)
	298	retval = error_count;
	299	return retval;
	300	}
	301