[ceph.git] / ceph / src / utf8proc / test / case.c

#include "tests.h"
#include <wctype.h>

int main(int argc, char **argv)
{
     int error = 0, better = 0;
     utf8proc_int32_t c;

     (void) argc; /* unused */
     (void) argv; /* unused */

     /* some simple sanity tests of the character widths */
     for (c = 0; c <= 0x110000; ++c) {
          utf8proc_int32_t l = utf8proc_tolower(c);
          utf8proc_int32_t u = utf8proc_toupper(c);
          utf8proc_int32_t t = utf8proc_totitle(c);

          check(l == c || utf8proc_codepoint_valid(l), "invalid tolower");
          check(u == c || utf8proc_codepoint_valid(u), "invalid toupper");
          check(t == c || utf8proc_codepoint_valid(t), "invalid totitle");

          if (utf8proc_codepoint_valid(c) && (l == u) != (l == t) &&
              /* Unicode 11: Georgian Mkhedruli chars have uppercase but no titlecase. */
              !(((c >= 0x10d0 && c <= 0x10fa) || c >= (0x10fd && c <= 0x10ff)) && l != u)) {
               fprintf(stderr, "unexpected titlecase %x for lowercase %x / uppercase %x\n", t, l, c);
               ++error;
          }

          if (sizeof(wint_t) > 2 || c < (1<<16)) {
               wint_t l0 = towlower(c), u0 = towupper(c);

               /* OS unicode tables may be out of date.  But if they
                  do have a lower/uppercase mapping, hopefully it
                  is correct? */
               if (l0 != c && l0 != l) {
                    fprintf(stderr, "MISMATCH %x != towlower(%x) == %x\n",
                            l, c, l0);
                    ++error;
               }
               else if (l0 != l) { /* often true for out-of-date OS unicode */
                    ++better;
                    /* printf("%x != towlower(%x) == %x\n", l, c, l0); */
               }
               if (u0 != c && u0 != u) {
                    fprintf(stderr, "MISMATCH %x != towupper(%x) == %x\n",
                            u, c, u0);
                    ++error;
               }
               else if (u0 != u) { /* often true for out-of-date OS unicode */
                    ++better;
                    /* printf("%x != towupper(%x) == %x\n", u, c, u0); */
               }
          }
     }
     check(!error, "utf8proc case conversion FAILED %d tests.", error);

     /* issue #130 */
     check(utf8proc_toupper(0x00df) == 0x1e9e &&
           utf8proc_totitle(0x00df) == 0x1e9e &&
           utf8proc_tolower(0x00df) == 0x00df &&
           utf8proc_tolower(0x1e9e) == 0x00df &&
           utf8proc_toupper(0x1e9e) == 0x1e9e,
           "incorrect 0x00df/0x1e9e case conversions");
     utf8proc_uint8_t str_00df[] = {0xc3, 0x9f, 0x00};
     utf8proc_uint8_t str_1e9e[] = {0xe1, 0xba, 0x9e, 0x00};
     check(!strcmp((char*)utf8proc_NFKC_Casefold(str_00df), "ss") &&
           !strcmp((char*)utf8proc_NFKC_Casefold(str_1e9e), "ss"),
           "incorrect 0x00df/0x1e9e casefold normalization");

     printf("More up-to-date than OS unicode tables for %d tests.\n", better);
     printf("utf8proc case conversion tests SUCCEEDED.\n");
     return 0;
}
Commit	Line	Data
1d09f67e TL	1	#include "tests.h"
	2	#include <wctype.h>
	3
	4	int main(int argc, char **argv)
	5	{
	6	int error = 0, better = 0;
	7	utf8proc_int32_t c;
	8
	9	(void) argc; /* unused */
	10	(void) argv; /* unused */
	11
	12	/* some simple sanity tests of the character widths */
	13	for (c = 0; c <= 0x110000; ++c) {
	14	utf8proc_int32_t l = utf8proc_tolower(c);
	15	utf8proc_int32_t u = utf8proc_toupper(c);
	16	utf8proc_int32_t t = utf8proc_totitle(c);
	17
	18	check(l == c \|\| utf8proc_codepoint_valid(l), "invalid tolower");
	19	check(u == c \|\| utf8proc_codepoint_valid(u), "invalid toupper");
	20	check(t == c \|\| utf8proc_codepoint_valid(t), "invalid totitle");
	21
	22	if (utf8proc_codepoint_valid(c) && (l == u) != (l == t) &&
	23	/* Unicode 11: Georgian Mkhedruli chars have uppercase but no titlecase. */
	24	!(((c >= 0x10d0 && c <= 0x10fa) \|\| c >= (0x10fd && c <= 0x10ff)) && l != u)) {
	25	fprintf(stderr, "unexpected titlecase %x for lowercase %x / uppercase %x\n", t, l, c);
	26	++error;
	27	}
	28
	29	if (sizeof(wint_t) > 2 \|\| c < (1<<16)) {
	30	wint_t l0 = towlower(c), u0 = towupper(c);
	31
	32	/* OS unicode tables may be out of date. But if they
	33	do have a lower/uppercase mapping, hopefully it
	34	is correct? */
	35	if (l0 != c && l0 != l) {
	36	fprintf(stderr, "MISMATCH %x != towlower(%x) == %x\n",
	37	l, c, l0);
	38	++error;
	39	}
	40	else if (l0 != l) { /* often true for out-of-date OS unicode */
	41	++better;
	42	/* printf("%x != towlower(%x) == %x\n", l, c, l0); */
	43	}
	44	if (u0 != c && u0 != u) {
	45	fprintf(stderr, "MISMATCH %x != towupper(%x) == %x\n",
	46	u, c, u0);
	47	++error;
	48	}
	49	else if (u0 != u) { /* often true for out-of-date OS unicode */
	50	++better;
	51	/* printf("%x != towupper(%x) == %x\n", u, c, u0); */
	52	}
	53	}
	54	}
	55	check(!error, "utf8proc case conversion FAILED %d tests.", error);
	56
	57	/* issue #130 */
	58	check(utf8proc_toupper(0x00df) == 0x1e9e &&
	59	utf8proc_totitle(0x00df) == 0x1e9e &&
	60	utf8proc_tolower(0x00df) == 0x00df &&
	61	utf8proc_tolower(0x1e9e) == 0x00df &&
	62	utf8proc_toupper(0x1e9e) == 0x1e9e,
	63	"incorrect 0x00df/0x1e9e case conversions");
	64	utf8proc_uint8_t str_00df[] = {0xc3, 0x9f, 0x00};
65	utf8proc_uint8_t str_1e9e[] = {0xe1, 0xba, 0x9e, 0x00};
66	check(!strcmp((char*)utf8proc_NFKC_Casefold(str_00df), "ss") &&
67	!strcmp((char*)utf8proc_NFKC_Casefold(str_1e9e), "ss"),
68	"incorrect 0x00df/0x1e9e casefold normalization");
69
70	printf("More up-to-date than OS unicode tables for %d tests.\n", better);
71	printf("utf8proc case conversion tests SUCCEEDED.\n");
72	return 0;
73	}