ceph/src/boost/libs/locale/test/test_codecvt.cpp

   1 //
   2 //  Copyright (c) 2015 Artyom Beilis (Tonkikh)
   3 //
   4 //  Distributed under the Boost Software License, Version 1.0. (See
   5 //  accompanying file LICENSE_1_0.txt or copy at
   6 //  http://www.boost.org/LICENSE_1_0.txt)
   7 //
   8 #include <boost/locale/utf8_codecvt.hpp>
   9 #include <locale>
  10 #include <iostream>
  11 #include <iomanip>
  12 #include <string.h>
  13 #include <memory.h>
  14 #define BOOST_LOCALE_ERROR_LIMIT -1
  15 #include "test_locale.hpp"
  16
  17 static char const *utf8_name = "\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt";
  18 static wchar_t const *wide_name = L"\U0001D49E-\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042.txt";
  19
  20 char const *res(std::codecvt_base::result r)
  21 {
  22     switch(r){
  23     case std::codecvt_base::ok: return "ok";
  24     case std::codecvt_base::partial: return "partial";
  25     case std::codecvt_base::error: return "error";
  26     case std::codecvt_base::noconv: return "noconv";
  27     default:
  28         return "error";
  29     }
  30 }
  31
  32 typedef std::codecvt<wchar_t,char,std::mbstate_t> cvt_type;
  33
  34 void test_codecvt_in_n_m(cvt_type const &cvt,int n,int m)
  35 {
  36     wchar_t const *wptr = wide_name;
  37     int wlen = wcslen(wide_name);
  38     int u8len = strlen(utf8_name);
  39     char const *from = utf8_name;
  40     char const *end = from;
  41     char const *real_end = utf8_name + u8len;
  42     char const *from_next = from;
  43     std::mbstate_t mb=std::mbstate_t();
  44     while(from_next < real_end) {
  45         if(from == end) {
  46             end = from + n;
  47             if(end > real_end)
  48                 end = real_end;
  49         }
  50
  51         wchar_t buf[128];
  52         wchar_t *to = buf;
  53         wchar_t *to_end = to + m;
  54         wchar_t *to_next = to;
  55
  56
  57         std::mbstate_t mb2 = mb;
  58         std::codecvt_base::result r = cvt.in(mb,from,end,from_next,to,to_end,to_next);
  59         //std::cout << "In from_size=" << (end-from) << " from move=" <<  (from_next - from) << " to move= " << to_next - to << " state = " << res(r) << std::endl;
  60
  61         int count = cvt.length(mb2,from,end,to_end - to);
  62         #ifndef BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST
  63         TEST(memcmp(&mb,&mb2,sizeof(mb))==0);
  64         if(count != from_next - from) {
  65             std::cout << count << " " << from_next - from << std::endl;
  66         }
  67         TEST(count == from_next - from);
  68         #else
  69         TEST(count == to_next - to);
  70         #endif
  71
  72
  73         if(r == cvt_type::partial) {
  74             end+=n;
  75             if(end > real_end)
  76                 end = real_end;
  77         }
  78         else
  79             TEST(r == cvt_type::ok);
  80         while(to!=to_next) {
  81             TEST(*wptr == *to);
  82             wptr++;
  83             to++;
  84         }
  85         to=to_next;
  86         from = from_next;
  87     }
  88     TEST(wptr == wide_name + wlen);
  89     TEST(from == real_end);
  90
  91 }
  92
  93 void test_codecvt_out_n_m(cvt_type const &cvt,int n,int m)
  94 {
  95     char const *nptr = utf8_name;
  96     int wlen = wcslen(wide_name);
  97     int u8len = strlen(utf8_name);
  98
  99     std::mbstate_t mb=std::mbstate_t();
 100
 101     wchar_t const *from_next = wide_name;
 102     wchar_t const *real_from_end = wide_name + wlen;
 103
 104     char buf[256];
 105     char *to = buf;
 106     char *to_next = to;
 107     char *to_end = to + n;
 108     char *real_to_end = buf + sizeof(buf);
 109
 110     while(from_next < real_from_end) {
 111         wchar_t const *from = from_next;
 112         wchar_t const *from_end = from + m;
 113         if(from_end > real_from_end)
 114             from_end = real_from_end;
 115         if(to_end == to) {
 116             to_end = to+n;
 117         }
 118
 119         std::codecvt_base::result r = cvt.out(mb,from,from_end,from_next,to,to_end,to_next);
 120         //std::cout << "In from_size=" << (end-from) << " from move=" <<  (from_next - from) << " to move= " << to_next - to << " state = " << res(r) << std::endl;
 121         if(r == cvt_type::partial) {
 122             TEST(to_end - to_next < cvt.max_length());
 123             to_end += n;
 124             if(to_end > real_to_end)
 125                 to_end = real_to_end;
 126         }
 127         else {
 128             TEST(r == cvt_type::ok);
 129         }
 130
 131         while(to!=to_next) {
 132             TEST(*nptr == *to);
 133             nptr++;
 134             to++;
 135         }
 136         from = from_next;
 137     }
 138     TEST(nptr == utf8_name + u8len);
 139     TEST(from_next == real_from_end);
 140     TEST(cvt.unshift(mb,to,to+n,to_next)==cvt_type::ok);
 141     TEST(to_next == to);
 142
 143 }
 144
 145
 146 void test_codecvt_conv()
 147 {
 148     std::cout << "Conversions " << std::endl;
 149     std::locale l(std::locale::classic(),new boost::locale::utf8_codecvt<wchar_t>());
 150
 151     cvt_type const &cvt = std::use_facet<cvt_type>(l);
 152
 153     TEST(cvt.max_length()==4);
 154
 155     for(int i=1;i<=(int)strlen(utf8_name)+1;i++) {
 156         for(int j=1;j<=(int)wcslen(wide_name)+1;j++) {
 157             try {
 158                 test_codecvt_in_n_m(cvt,i,j);
 159                 test_codecvt_out_n_m(cvt,i,j);
 160             }
 161             catch(...) {
 162                 std::cerr << "Wlen=" <<j << " Nlen=" << i << std::endl;
 163                 throw;
 164             }
 165         }
 166     }
 167 }
 168
 169 void test_codecvt_err()
 170 {
 171     std::cout << "Errors " << std::endl;
 172     std::locale l(std::locale::classic(),new boost::locale::utf8_codecvt<wchar_t>());
 173
 174     cvt_type const &cvt = std::use_facet<cvt_type>(l);
 175
 176     std::cout << "- UTF-8" << std::endl;
 177     {
 178
 179         wchar_t buf[2];
 180         wchar_t *to=buf;
 181         wchar_t *to_end = buf+2;
 182         wchar_t *to_next = to;
 183         char const *err_utf="1\xFF\xFF";
 184         {
 185             std::mbstate_t mb=std::mbstate_t();
 186             char const *from=err_utf;
 187             char const *from_end = from + strlen(from);
 188             char const *from_next = from;
 189             to_next = to;
 190             TEST(cvt.in(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::error);
 191             TEST(from_next == from+1);
 192             TEST(to_next == to + 1);
 193             TEST(*to == '1');
 194         }
 195         err_utf++;
 196         {
 197             std::mbstate_t mb=std::mbstate_t();
 198             char const *from=err_utf;
 199             char const *from_end = from + strlen(from);
 200             char const *from_next = from;
 201             TEST(cvt.in(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::error);
 202             TEST(from_next == from);
 203             TEST(to_next == to);
 204         }
 205     }
 206
 207     std::cout << "- UTF-16/32" << std::endl;
 208     {
 209
 210         char buf[32];
 211         char *to=buf;
 212         char *to_end = buf+32;
 213         char *to_next = to;
 214         wchar_t err_buf[3] = { '1' , 0xDC9E }; // second surrogate not works both for UTF-16 and 32
 215         wchar_t const *err_utf = err_buf;
 216         {
 217             std::mbstate_t mb=std::mbstate_t();
 218             wchar_t const *from=err_utf;
 219             wchar_t const *from_end = from + wcslen(from);
 220             wchar_t const *from_next = from;
 221             TEST(cvt.out(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::error);
 222             TEST(from_next == from+1);
 223             TEST(to_next == to + 1);
 224             TEST(*to == '1');
 225         }
 226         err_utf++;
 227         {
 228             std::mbstate_t mb=std::mbstate_t();
 229             wchar_t const *from=err_utf;
 230             wchar_t const *from_end = from + wcslen(from);
 231             wchar_t const *from_next = from;
 232             to_next = to;
 233             TEST(cvt.out(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::error);
 234             TEST(from_next == from);
 235             TEST(to_next == to);
 236         }
 237     }
 238
 239 }
 240
 241
 242 void test_char_char()
 243 {
 244     std::cout << "Char-char specialization"<<std::endl;
 245     std::locale l(std::locale::classic(),new boost::locale::utf8_codecvt<char>());
 246     std::codecvt<char,char,std::mbstate_t> const &cvt=std::use_facet<std::codecvt<char,char,std::mbstate_t> >(l);
 247     std::mbstate_t mb=std::mbstate_t();
 248     char const *from = "a";
 249     char const *from_end = from+1;
 250     char const *from_next = from;
 251     char buf[2];
 252     char *to = buf;
 253     char *to_end = buf+1;
 254     char *to_next = to;
 255     TEST(cvt.always_noconv()==true);
 256     TEST(cvt.in(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::noconv);
 257     TEST(from_next == from);
 258     TEST(to_next == to);
 259     TEST(cvt.out(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::noconv);
 260     TEST(from_next == from);
 261     TEST(to_next == to);
 262     TEST(cvt.encoding()==1);
 263     TEST(cvt.max_length()==1);
 264 }
 265
 266 int main()
 267 {
 268     try {
 269         test_codecvt_conv();
 270         test_codecvt_err();
 271         test_char_char();
 272
 273     }
 274     catch(std::exception const &e) {
 275         std::cerr << "Failed : " << e.what() << std::endl;
 276         return 1;
 277     }
 278     std::cout << "Ok" << std::endl;
 279     return 0;
 280 }
 281 ///
 282 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4