+++ /dev/null
-/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd\r
- See the file COPYING for copying permission.\r
-*/\r
-\r
-/* This file is included! */\r
-#ifdef XML_TOK_IMPL_C\r
-\r
-#ifndef IS_INVALID_CHAR\r
-#define IS_INVALID_CHAR(enc, ptr, n) (0)\r
-#endif\r
-\r
-#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \\r
- case BT_LEAD ## n: \\r
- if (end - ptr < n) \\r
- return XML_TOK_PARTIAL_CHAR; \\r
- if (IS_INVALID_CHAR(enc, ptr, n)) { \\r
- *(nextTokPtr) = (ptr); \\r
- return XML_TOK_INVALID; \\r
- } \\r
- ptr += n; \\r
- break;\r
-\r
-#define INVALID_CASES(ptr, nextTokPtr) \\r
- INVALID_LEAD_CASE(2, ptr, nextTokPtr) \\r
- INVALID_LEAD_CASE(3, ptr, nextTokPtr) \\r
- INVALID_LEAD_CASE(4, ptr, nextTokPtr) \\r
- case BT_NONXML: \\r
- case BT_MALFORM: \\r
- case BT_TRAIL: \\r
- *(nextTokPtr) = (ptr); \\r
- return XML_TOK_INVALID;\r
-\r
-#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \\r
- case BT_LEAD ## n: \\r
- if (end - ptr < n) \\r
- return XML_TOK_PARTIAL_CHAR; \\r
- if (!IS_NAME_CHAR(enc, ptr, n)) { \\r
- *nextTokPtr = ptr; \\r
- return XML_TOK_INVALID; \\r
- } \\r
- ptr += n; \\r
- break;\r
-\r
-#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \\r
- case BT_NONASCII: \\r
- if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \\r
- *nextTokPtr = ptr; \\r
- return XML_TOK_INVALID; \\r
- } \\r
- case BT_NMSTRT: \\r
- case BT_HEX: \\r
- case BT_DIGIT: \\r
- case BT_NAME: \\r
- case BT_MINUS: \\r
- ptr += MINBPC(enc); \\r
- break; \\r
- CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \\r
- CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \\r
- CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)\r
-\r
-#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \\r
- case BT_LEAD ## n: \\r
- if (end - ptr < n) \\r
- return XML_TOK_PARTIAL_CHAR; \\r
- if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \\r
- *nextTokPtr = ptr; \\r
- return XML_TOK_INVALID; \\r
- } \\r
- ptr += n; \\r
- break;\r
-\r
-#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \\r
- case BT_NONASCII: \\r
- if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \\r
- *nextTokPtr = ptr; \\r
- return XML_TOK_INVALID; \\r
- } \\r
- case BT_NMSTRT: \\r
- case BT_HEX: \\r
- ptr += MINBPC(enc); \\r
- break; \\r
- CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \\r
- CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \\r
- CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)\r
-\r
-#ifndef PREFIX\r
-#define PREFIX(ident) ident\r
-#endif\r
-\r
-/* ptr points to character following "<!-" */\r
-\r
-static int PTRCALL\r
-PREFIX(scanComment)(const ENCODING *enc, const char *ptr,\r
- const char *end, const char **nextTokPtr)\r
-{\r
- if (ptr != end) {\r
- if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- ptr += MINBPC(enc);\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- INVALID_CASES(ptr, nextTokPtr)\r
- case BT_MINUS:\r
- if ((ptr += MINBPC(enc)) == end)\r
- return XML_TOK_PARTIAL;\r
- if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {\r
- if ((ptr += MINBPC(enc)) == end)\r
- return XML_TOK_PARTIAL;\r
- if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_COMMENT;\r
- }\r
- break;\r
- default:\r
- ptr += MINBPC(enc);\r
- break;\r
- }\r
- }\r
- }\r
- return XML_TOK_PARTIAL;\r
-}\r
-\r
-/* ptr points to character following "<!" */\r
-\r
-static int PTRCALL\r
-PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,\r
- const char *end, const char **nextTokPtr)\r
-{\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_MINUS:\r
- return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);\r
- case BT_LSQB:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_COND_SECT_OPEN;\r
- case BT_NMSTRT:\r
- case BT_HEX:\r
- ptr += MINBPC(enc);\r
- break;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_PERCNT:\r
- if (ptr + MINBPC(enc) == end)\r
- return XML_TOK_PARTIAL;\r
- /* don't allow <!ENTITY% foo "whatever"> */\r
- switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {\r
- case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- /* fall through */\r
- case BT_S: case BT_CR: case BT_LF:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DECL_OPEN;\r
- case BT_NMSTRT:\r
- case BT_HEX:\r
- ptr += MINBPC(enc);\r
- break;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- return XML_TOK_PARTIAL;\r
-}\r
-\r
-static int PTRCALL\r
-PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,\r
- const char *end, int *tokPtr)\r
-{\r
- int upper = 0;\r
- *tokPtr = XML_TOK_PI;\r
- if (end - ptr != MINBPC(enc)*3)\r
- return 1;\r
- switch (BYTE_TO_ASCII(enc, ptr)) {\r
- case ASCII_x:\r
- break;\r
- case ASCII_X:\r
- upper = 1;\r
- break;\r
- default:\r
- return 1;\r
- }\r
- ptr += MINBPC(enc);\r
- switch (BYTE_TO_ASCII(enc, ptr)) {\r
- case ASCII_m:\r
- break;\r
- case ASCII_M:\r
- upper = 1;\r
- break;\r
- default:\r
- return 1;\r
- }\r
- ptr += MINBPC(enc);\r
- switch (BYTE_TO_ASCII(enc, ptr)) {\r
- case ASCII_l:\r
- break;\r
- case ASCII_L:\r
- upper = 1;\r
- break;\r
- default:\r
- return 1;\r
- }\r
- if (upper)\r
- return 0;\r
- *tokPtr = XML_TOK_XML_DECL;\r
- return 1;\r
-}\r
-\r
-/* ptr points to character following "<?" */\r
-\r
-static int PTRCALL\r
-PREFIX(scanPi)(const ENCODING *enc, const char *ptr,\r
- const char *end, const char **nextTokPtr)\r
-{\r
- int tok;\r
- const char *target = ptr;\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)\r
- case BT_S: case BT_CR: case BT_LF:\r
- if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- ptr += MINBPC(enc);\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- INVALID_CASES(ptr, nextTokPtr)\r
- case BT_QUEST:\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return tok;\r
- }\r
- break;\r
- default:\r
- ptr += MINBPC(enc);\r
- break;\r
- }\r
- }\r
- return XML_TOK_PARTIAL;\r
- case BT_QUEST:\r
- if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return tok;\r
- }\r
- /* fall through */\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- return XML_TOK_PARTIAL;\r
-}\r
-\r
-static int PTRCALL\r
-PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,\r
- const char *end, const char **nextTokPtr)\r
-{\r
- static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,\r
- ASCII_T, ASCII_A, ASCII_LSQB };\r
- int i;\r
- /* CDATA[ */\r
- if (end - ptr < 6 * MINBPC(enc))\r
- return XML_TOK_PARTIAL;\r
- for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {\r
- if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_CDATA_SECT_OPEN;\r
-}\r
-\r
-static int PTRCALL\r
-PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,\r
- const char *end, const char **nextTokPtr)\r
-{\r
- if (ptr == end)\r
- return XML_TOK_NONE;\r
- if (MINBPC(enc) > 1) {\r
- size_t n = end - ptr;\r
- if (n & (MINBPC(enc) - 1)) {\r
- n &= ~(MINBPC(enc) - 1);\r
- if (n == 0)\r
- return XML_TOK_PARTIAL;\r
- end = ptr + n;\r
- }\r
- }\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_RSQB:\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))\r
- break;\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {\r
- ptr -= MINBPC(enc);\r
- break;\r
- }\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_CDATA_SECT_CLOSE;\r
- case BT_CR:\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- if (BYTE_TYPE(enc, ptr) == BT_LF)\r
- ptr += MINBPC(enc);\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_NEWLINE;\r
- case BT_LF:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_DATA_NEWLINE;\r
- INVALID_CASES(ptr, nextTokPtr)\r
- default:\r
- ptr += MINBPC(enc);\r
- break;\r
- }\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
-#define LEAD_CASE(n) \\r
- case BT_LEAD ## n: \\r
- if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \\r
- *nextTokPtr = ptr; \\r
- return XML_TOK_DATA_CHARS; \\r
- } \\r
- ptr += n; \\r
- break;\r
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)\r
-#undef LEAD_CASE\r
- case BT_NONXML:\r
- case BT_MALFORM:\r
- case BT_TRAIL:\r
- case BT_CR:\r
- case BT_LF:\r
- case BT_RSQB:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_CHARS;\r
- default:\r
- ptr += MINBPC(enc);\r
- break;\r
- }\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_CHARS;\r
-}\r
-\r
-/* ptr points to character following "</" */\r
-\r
-static int PTRCALL\r
-PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,\r
- const char *end, const char **nextTokPtr)\r
-{\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)\r
- case BT_S: case BT_CR: case BT_LF:\r
- for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_S: case BT_CR: case BT_LF:\r
- break;\r
- case BT_GT:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_END_TAG;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- return XML_TOK_PARTIAL;\r
-#ifdef XML_NS\r
- case BT_COLON:\r
- /* no need to check qname syntax here,\r
- since end-tag must match exactly */\r
- ptr += MINBPC(enc);\r
- break;\r
-#endif\r
- case BT_GT:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_END_TAG;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- return XML_TOK_PARTIAL;\r
-}\r
-\r
-/* ptr points to character following "&#X" */\r
-\r
-static int PTRCALL\r
-PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,\r
- const char *end, const char **nextTokPtr)\r
-{\r
- if (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_DIGIT:\r
- case BT_HEX:\r
- break;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_DIGIT:\r
- case BT_HEX:\r
- break;\r
- case BT_SEMI:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_CHAR_REF;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- }\r
- return XML_TOK_PARTIAL;\r
-}\r
-\r
-/* ptr points to character following "&#" */\r
-\r
-static int PTRCALL\r
-PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,\r
- const char *end, const char **nextTokPtr)\r
-{\r
- if (ptr != end) {\r
- if (CHAR_MATCHES(enc, ptr, ASCII_x))\r
- return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_DIGIT:\r
- break;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_DIGIT:\r
- break;\r
- case BT_SEMI:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_CHAR_REF;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- }\r
- return XML_TOK_PARTIAL;\r
-}\r
-\r
-/* ptr points to character following "&" */\r
-\r
-static int PTRCALL\r
-PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,\r
- const char **nextTokPtr)\r
-{\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)\r
- case BT_NUM:\r
- return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)\r
- case BT_SEMI:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_ENTITY_REF;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- return XML_TOK_PARTIAL;\r
-}\r
-\r
-/* ptr points to character following first character of attribute name */\r
-\r
-static int PTRCALL\r
-PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,\r
- const char **nextTokPtr)\r
-{\r
-#ifdef XML_NS\r
- int hadColon = 0;\r
-#endif\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)\r
-#ifdef XML_NS\r
- case BT_COLON:\r
- if (hadColon) {\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- hadColon = 1;\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- break;\r
-#endif\r
- case BT_S: case BT_CR: case BT_LF:\r
- for (;;) {\r
- int t;\r
-\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- t = BYTE_TYPE(enc, ptr);\r
- if (t == BT_EQUALS)\r
- break;\r
- switch (t) {\r
- case BT_S:\r
- case BT_LF:\r
- case BT_CR:\r
- break;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- /* fall through */\r
- case BT_EQUALS:\r
- {\r
- int open;\r
-#ifdef XML_NS\r
- hadColon = 0;\r
-#endif\r
- for (;;) {\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- open = BYTE_TYPE(enc, ptr);\r
- if (open == BT_QUOT || open == BT_APOS)\r
- break;\r
- switch (open) {\r
- case BT_S:\r
- case BT_LF:\r
- case BT_CR:\r
- break;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- ptr += MINBPC(enc);\r
- /* in attribute value */\r
- for (;;) {\r
- int t;\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- t = BYTE_TYPE(enc, ptr);\r
- if (t == open)\r
- break;\r
- switch (t) {\r
- INVALID_CASES(ptr, nextTokPtr)\r
- case BT_AMP:\r
- {\r
- int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);\r
- if (tok <= 0) {\r
- if (tok == XML_TOK_INVALID)\r
- *nextTokPtr = ptr;\r
- return tok;\r
- }\r
- break;\r
- }\r
- case BT_LT:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- default:\r
- ptr += MINBPC(enc);\r
- break;\r
- }\r
- }\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_S:\r
- case BT_CR:\r
- case BT_LF:\r
- break;\r
- case BT_SOL:\r
- goto sol;\r
- case BT_GT:\r
- goto gt;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- /* ptr points to closing quote */\r
- for (;;) {\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)\r
- case BT_S: case BT_CR: case BT_LF:\r
- continue;\r
- case BT_GT:\r
- gt:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_START_TAG_WITH_ATTS;\r
- case BT_SOL:\r
- sol:\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- break;\r
- }\r
- break;\r
- }\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- return XML_TOK_PARTIAL;\r
-}\r
-\r
-/* ptr points to character following "<" */\r
-\r
-static int PTRCALL\r
-PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,\r
- const char **nextTokPtr)\r
-{\r
-#ifdef XML_NS\r
- int hadColon;\r
-#endif\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)\r
- case BT_EXCL:\r
- if ((ptr += MINBPC(enc)) == end)\r
- return XML_TOK_PARTIAL;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_MINUS:\r
- return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);\r
- case BT_LSQB:\r
- return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),\r
- end, nextTokPtr);\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- case BT_QUEST:\r
- return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);\r
- case BT_SOL:\r
- return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
-#ifdef XML_NS\r
- hadColon = 0;\r
-#endif\r
- /* we have a start-tag */\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)\r
-#ifdef XML_NS\r
- case BT_COLON:\r
- if (hadColon) {\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- hadColon = 1;\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- break;\r
-#endif\r
- case BT_S: case BT_CR: case BT_LF:\r
- {\r
- ptr += MINBPC(enc);\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)\r
- case BT_GT:\r
- goto gt;\r
- case BT_SOL:\r
- goto sol;\r
- case BT_S: case BT_CR: case BT_LF:\r
- ptr += MINBPC(enc);\r
- continue;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);\r
- }\r
- return XML_TOK_PARTIAL;\r
- }\r
- case BT_GT:\r
- gt:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_START_TAG_NO_ATTS;\r
- case BT_SOL:\r
- sol:\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_EMPTY_ELEMENT_NO_ATTS;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- return XML_TOK_PARTIAL;\r
-}\r
-\r
-static int PTRCALL\r
-PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,\r
- const char **nextTokPtr)\r
-{\r
- if (ptr == end)\r
- return XML_TOK_NONE;\r
- if (MINBPC(enc) > 1) {\r
- size_t n = end - ptr;\r
- if (n & (MINBPC(enc) - 1)) {\r
- n &= ~(MINBPC(enc) - 1);\r
- if (n == 0)\r
- return XML_TOK_PARTIAL;\r
- end = ptr + n;\r
- }\r
- }\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_LT:\r
- return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);\r
- case BT_AMP:\r
- return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);\r
- case BT_CR:\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_TRAILING_CR;\r
- if (BYTE_TYPE(enc, ptr) == BT_LF)\r
- ptr += MINBPC(enc);\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_NEWLINE;\r
- case BT_LF:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_DATA_NEWLINE;\r
- case BT_RSQB:\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_TRAILING_RSQB;\r
- if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))\r
- break;\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_TRAILING_RSQB;\r
- if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {\r
- ptr -= MINBPC(enc);\r
- break;\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- INVALID_CASES(ptr, nextTokPtr)\r
- default:\r
- ptr += MINBPC(enc);\r
- break;\r
- }\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
-#define LEAD_CASE(n) \\r
- case BT_LEAD ## n: \\r
- if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \\r
- *nextTokPtr = ptr; \\r
- return XML_TOK_DATA_CHARS; \\r
- } \\r
- ptr += n; \\r
- break;\r
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)\r
-#undef LEAD_CASE\r
- case BT_RSQB:\r
- if (ptr + MINBPC(enc) != end) {\r
- if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {\r
- ptr += MINBPC(enc);\r
- break;\r
- }\r
- if (ptr + 2*MINBPC(enc) != end) {\r
- if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {\r
- ptr += MINBPC(enc);\r
- break;\r
- }\r
- *nextTokPtr = ptr + 2*MINBPC(enc);\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- /* fall through */\r
- case BT_AMP:\r
- case BT_LT:\r
- case BT_NONXML:\r
- case BT_MALFORM:\r
- case BT_TRAIL:\r
- case BT_CR:\r
- case BT_LF:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_CHARS;\r
- default:\r
- ptr += MINBPC(enc);\r
- break;\r
- }\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_CHARS;\r
-}\r
-\r
-/* ptr points to character following "%" */\r
-\r
-static int PTRCALL\r
-PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,\r
- const char **nextTokPtr)\r
-{\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)\r
- case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_PERCENT;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)\r
- case BT_SEMI:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_PARAM_ENTITY_REF;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- return XML_TOK_PARTIAL;\r
-}\r
-\r
-static int PTRCALL\r
-PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,\r
- const char **nextTokPtr)\r
-{\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)\r
- case BT_CR: case BT_LF: case BT_S:\r
- case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_POUND_NAME;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- return -XML_TOK_POUND_NAME;\r
-}\r
-\r
-static int PTRCALL\r
-PREFIX(scanLit)(int open, const ENCODING *enc,\r
- const char *ptr, const char *end,\r
- const char **nextTokPtr)\r
-{\r
- while (ptr != end) {\r
- int t = BYTE_TYPE(enc, ptr);\r
- switch (t) {\r
- INVALID_CASES(ptr, nextTokPtr)\r
- case BT_QUOT:\r
- case BT_APOS:\r
- ptr += MINBPC(enc);\r
- if (t != open)\r
- break;\r
- if (ptr == end)\r
- return -XML_TOK_LITERAL;\r
- *nextTokPtr = ptr;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_S: case BT_CR: case BT_LF:\r
- case BT_GT: case BT_PERCNT: case BT_LSQB:\r
- return XML_TOK_LITERAL;\r
- default:\r
- return XML_TOK_INVALID;\r
- }\r
- default:\r
- ptr += MINBPC(enc);\r
- break;\r
- }\r
- }\r
- return XML_TOK_PARTIAL;\r
-}\r
-\r
-static int PTRCALL\r
-PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,\r
- const char **nextTokPtr)\r
-{\r
- int tok;\r
- if (ptr == end)\r
- return XML_TOK_NONE;\r
- if (MINBPC(enc) > 1) {\r
- size_t n = end - ptr;\r
- if (n & (MINBPC(enc) - 1)) {\r
- n &= ~(MINBPC(enc) - 1);\r
- if (n == 0)\r
- return XML_TOK_PARTIAL;\r
- end = ptr + n;\r
- }\r
- }\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_QUOT:\r
- return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);\r
- case BT_APOS:\r
- return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);\r
- case BT_LT:\r
- {\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_EXCL:\r
- return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);\r
- case BT_QUEST:\r
- return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);\r
- case BT_NMSTRT:\r
- case BT_HEX:\r
- case BT_NONASCII:\r
- case BT_LEAD2:\r
- case BT_LEAD3:\r
- case BT_LEAD4:\r
- *nextTokPtr = ptr - MINBPC(enc);\r
- return XML_TOK_INSTANCE_START;\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- case BT_CR:\r
- if (ptr + MINBPC(enc) == end) {\r
- *nextTokPtr = end;\r
- /* indicate that this might be part of a CR/LF pair */\r
- return -XML_TOK_PROLOG_S;\r
- }\r
- /* fall through */\r
- case BT_S: case BT_LF:\r
- for (;;) {\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- break;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_S: case BT_LF:\r
- break;\r
- case BT_CR:\r
- /* don't split CR/LF pair */\r
- if (ptr + MINBPC(enc) != end)\r
- break;\r
- /* fall through */\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_PROLOG_S;\r
- }\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_PROLOG_S;\r
- case BT_PERCNT:\r
- return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);\r
- case BT_COMMA:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_COMMA;\r
- case BT_LSQB:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_OPEN_BRACKET;\r
- case BT_RSQB:\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return -XML_TOK_CLOSE_BRACKET;\r
- if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {\r
- if (ptr + MINBPC(enc) == end)\r
- return XML_TOK_PARTIAL;\r
- if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {\r
- *nextTokPtr = ptr + 2*MINBPC(enc);\r
- return XML_TOK_COND_SECT_CLOSE;\r
- }\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_CLOSE_BRACKET;\r
- case BT_LPAR:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_OPEN_PAREN;\r
- case BT_RPAR:\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return -XML_TOK_CLOSE_PAREN;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_AST:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_CLOSE_PAREN_ASTERISK;\r
- case BT_QUEST:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_CLOSE_PAREN_QUESTION;\r
- case BT_PLUS:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_CLOSE_PAREN_PLUS;\r
- case BT_CR: case BT_LF: case BT_S:\r
- case BT_GT: case BT_COMMA: case BT_VERBAR:\r
- case BT_RPAR:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_CLOSE_PAREN;\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- case BT_VERBAR:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_OR;\r
- case BT_GT:\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_DECL_CLOSE;\r
- case BT_NUM:\r
- return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);\r
-#define LEAD_CASE(n) \\r
- case BT_LEAD ## n: \\r
- if (end - ptr < n) \\r
- return XML_TOK_PARTIAL_CHAR; \\r
- if (IS_NMSTRT_CHAR(enc, ptr, n)) { \\r
- ptr += n; \\r
- tok = XML_TOK_NAME; \\r
- break; \\r
- } \\r
- if (IS_NAME_CHAR(enc, ptr, n)) { \\r
- ptr += n; \\r
- tok = XML_TOK_NMTOKEN; \\r
- break; \\r
- } \\r
- *nextTokPtr = ptr; \\r
- return XML_TOK_INVALID;\r
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)\r
-#undef LEAD_CASE\r
- case BT_NMSTRT:\r
- case BT_HEX:\r
- tok = XML_TOK_NAME;\r
- ptr += MINBPC(enc);\r
- break;\r
- case BT_DIGIT:\r
- case BT_NAME:\r
- case BT_MINUS:\r
-#ifdef XML_NS\r
- case BT_COLON:\r
-#endif\r
- tok = XML_TOK_NMTOKEN;\r
- ptr += MINBPC(enc);\r
- break;\r
- case BT_NONASCII:\r
- if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {\r
- ptr += MINBPC(enc);\r
- tok = XML_TOK_NAME;\r
- break;\r
- }\r
- if (IS_NAME_CHAR_MINBPC(enc, ptr)) {\r
- ptr += MINBPC(enc);\r
- tok = XML_TOK_NMTOKEN;\r
- break;\r
- }\r
- /* fall through */\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)\r
- case BT_GT: case BT_RPAR: case BT_COMMA:\r
- case BT_VERBAR: case BT_LSQB: case BT_PERCNT:\r
- case BT_S: case BT_CR: case BT_LF:\r
- *nextTokPtr = ptr;\r
- return tok;\r
-#ifdef XML_NS\r
- case BT_COLON:\r
- ptr += MINBPC(enc);\r
- switch (tok) {\r
- case XML_TOK_NAME:\r
- if (ptr == end)\r
- return XML_TOK_PARTIAL;\r
- tok = XML_TOK_PREFIXED_NAME;\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)\r
- default:\r
- tok = XML_TOK_NMTOKEN;\r
- break;\r
- }\r
- break;\r
- case XML_TOK_PREFIXED_NAME:\r
- tok = XML_TOK_NMTOKEN;\r
- break;\r
- }\r
- break;\r
-#endif\r
- case BT_PLUS:\r
- if (tok == XML_TOK_NMTOKEN) {\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_NAME_PLUS;\r
- case BT_AST:\r
- if (tok == XML_TOK_NMTOKEN) {\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_NAME_ASTERISK;\r
- case BT_QUEST:\r
- if (tok == XML_TOK_NMTOKEN) {\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_NAME_QUESTION;\r
- default:\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- }\r
- }\r
- return -tok;\r
-}\r
-\r
-static int PTRCALL\r
-PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,\r
- const char *end, const char **nextTokPtr)\r
-{\r
- const char *start;\r
- if (ptr == end)\r
- return XML_TOK_NONE;\r
- start = ptr;\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
-#define LEAD_CASE(n) \\r
- case BT_LEAD ## n: ptr += n; break;\r
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)\r
-#undef LEAD_CASE\r
- case BT_AMP:\r
- if (ptr == start)\r
- return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_CHARS;\r
- case BT_LT:\r
- /* this is for inside entity references */\r
- *nextTokPtr = ptr;\r
- return XML_TOK_INVALID;\r
- case BT_LF:\r
- if (ptr == start) {\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_DATA_NEWLINE;\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_CHARS;\r
- case BT_CR:\r
- if (ptr == start) {\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_TRAILING_CR;\r
- if (BYTE_TYPE(enc, ptr) == BT_LF)\r
- ptr += MINBPC(enc);\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_NEWLINE;\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_CHARS;\r
- case BT_S:\r
- if (ptr == start) {\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_ATTRIBUTE_VALUE_S;\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_CHARS;\r
- default:\r
- ptr += MINBPC(enc);\r
- break;\r
- }\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_CHARS;\r
-}\r
-\r
-static int PTRCALL\r
-PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,\r
- const char *end, const char **nextTokPtr)\r
-{\r
- const char *start;\r
- if (ptr == end)\r
- return XML_TOK_NONE;\r
- start = ptr;\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
-#define LEAD_CASE(n) \\r
- case BT_LEAD ## n: ptr += n; break;\r
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)\r
-#undef LEAD_CASE\r
- case BT_AMP:\r
- if (ptr == start)\r
- return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_CHARS;\r
- case BT_PERCNT:\r
- if (ptr == start) {\r
- int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),\r
- end, nextTokPtr);\r
- return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_CHARS;\r
- case BT_LF:\r
- if (ptr == start) {\r
- *nextTokPtr = ptr + MINBPC(enc);\r
- return XML_TOK_DATA_NEWLINE;\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_CHARS;\r
- case BT_CR:\r
- if (ptr == start) {\r
- ptr += MINBPC(enc);\r
- if (ptr == end)\r
- return XML_TOK_TRAILING_CR;\r
- if (BYTE_TYPE(enc, ptr) == BT_LF)\r
- ptr += MINBPC(enc);\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_NEWLINE;\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_CHARS;\r
- default:\r
- ptr += MINBPC(enc);\r
- break;\r
- }\r
- }\r
- *nextTokPtr = ptr;\r
- return XML_TOK_DATA_CHARS;\r
-}\r
-\r
-#ifdef XML_DTD\r
-\r
-static int PTRCALL\r
-PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,\r
- const char *end, const char **nextTokPtr)\r
-{\r
- int level = 0;\r
- if (MINBPC(enc) > 1) {\r
- size_t n = end - ptr;\r
- if (n & (MINBPC(enc) - 1)) {\r
- n &= ~(MINBPC(enc) - 1);\r
- end = ptr + n;\r
- }\r
- }\r
- while (ptr != end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- INVALID_CASES(ptr, nextTokPtr)\r
- case BT_LT:\r
- if ((ptr += MINBPC(enc)) == end)\r
- return XML_TOK_PARTIAL;\r
- if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {\r
- if ((ptr += MINBPC(enc)) == end)\r
- return XML_TOK_PARTIAL;\r
- if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {\r
- ++level;\r
- ptr += MINBPC(enc);\r
- }\r
- }\r
- break;\r
- case BT_RSQB:\r
- if ((ptr += MINBPC(enc)) == end)\r
- return XML_TOK_PARTIAL;\r
- if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {\r
- if ((ptr += MINBPC(enc)) == end)\r
- return XML_TOK_PARTIAL;\r
- if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {\r
- ptr += MINBPC(enc);\r
- if (level == 0) {\r
- *nextTokPtr = ptr;\r
- return XML_TOK_IGNORE_SECT;\r
- }\r
- --level;\r
- }\r
- }\r
- break;\r
- default:\r
- ptr += MINBPC(enc);\r
- break;\r
- }\r
- }\r
- return XML_TOK_PARTIAL;\r
-}\r
-\r
-#endif /* XML_DTD */\r
-\r
-static int PTRCALL\r
-PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,\r
- const char **badPtr)\r
-{\r
- ptr += MINBPC(enc);\r
- end -= MINBPC(enc);\r
- for (; ptr != end; ptr += MINBPC(enc)) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_DIGIT:\r
- case BT_HEX:\r
- case BT_MINUS:\r
- case BT_APOS:\r
- case BT_LPAR:\r
- case BT_RPAR:\r
- case BT_PLUS:\r
- case BT_COMMA:\r
- case BT_SOL:\r
- case BT_EQUALS:\r
- case BT_QUEST:\r
- case BT_CR:\r
- case BT_LF:\r
- case BT_SEMI:\r
- case BT_EXCL:\r
- case BT_AST:\r
- case BT_PERCNT:\r
- case BT_NUM:\r
-#ifdef XML_NS\r
- case BT_COLON:\r
-#endif\r
- break;\r
- case BT_S:\r
- if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {\r
- *badPtr = ptr;\r
- return 0;\r
- }\r
- break;\r
- case BT_NAME:\r
- case BT_NMSTRT:\r
- if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))\r
- break;\r
- default:\r
- switch (BYTE_TO_ASCII(enc, ptr)) {\r
- case 0x24: /* $ */\r
- case 0x40: /* @ */\r
- break;\r
- default:\r
- *badPtr = ptr;\r
- return 0;\r
- }\r
- break;\r
- }\r
- }\r
- return 1;\r
-}\r
-\r
-/* This must only be called for a well-formed start-tag or empty\r
- element tag. Returns the number of attributes. Pointers to the\r
- first attsMax attributes are stored in atts.\r
-*/\r
-\r
-static int PTRCALL\r
-PREFIX(getAtts)(const ENCODING *enc, const char *ptr,\r
- int attsMax, ATTRIBUTE *atts)\r
-{\r
- enum { other, inName, inValue } state = inName;\r
- int nAtts = 0;\r
- int open = 0; /* defined when state == inValue;\r
- initialization just to shut up compilers */\r
-\r
- for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
-#define START_NAME \\r
- if (state == other) { \\r
- if (nAtts < attsMax) { \\r
- atts[nAtts].name = ptr; \\r
- atts[nAtts].normalized = 1; \\r
- } \\r
- state = inName; \\r
- }\r
-#define LEAD_CASE(n) \\r
- case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;\r
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)\r
-#undef LEAD_CASE\r
- case BT_NONASCII:\r
- case BT_NMSTRT:\r
- case BT_HEX:\r
- START_NAME\r
- break;\r
-#undef START_NAME\r
- case BT_QUOT:\r
- if (state != inValue) {\r
- if (nAtts < attsMax)\r
- atts[nAtts].valuePtr = ptr + MINBPC(enc);\r
- state = inValue;\r
- open = BT_QUOT;\r
- }\r
- else if (open == BT_QUOT) {\r
- state = other;\r
- if (nAtts < attsMax)\r
- atts[nAtts].valueEnd = ptr;\r
- nAtts++;\r
- }\r
- break;\r
- case BT_APOS:\r
- if (state != inValue) {\r
- if (nAtts < attsMax)\r
- atts[nAtts].valuePtr = ptr + MINBPC(enc);\r
- state = inValue;\r
- open = BT_APOS;\r
- }\r
- else if (open == BT_APOS) {\r
- state = other;\r
- if (nAtts < attsMax)\r
- atts[nAtts].valueEnd = ptr;\r
- nAtts++;\r
- }\r
- break;\r
- case BT_AMP:\r
- if (nAtts < attsMax)\r
- atts[nAtts].normalized = 0;\r
- break;\r
- case BT_S:\r
- if (state == inName)\r
- state = other;\r
- else if (state == inValue\r
- && nAtts < attsMax\r
- && atts[nAtts].normalized\r
- && (ptr == atts[nAtts].valuePtr\r
- || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE\r
- || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE\r
- || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))\r
- atts[nAtts].normalized = 0;\r
- break;\r
- case BT_CR: case BT_LF:\r
- /* This case ensures that the first attribute name is counted\r
- Apart from that we could just change state on the quote. */\r
- if (state == inName)\r
- state = other;\r
- else if (state == inValue && nAtts < attsMax)\r
- atts[nAtts].normalized = 0;\r
- break;\r
- case BT_GT:\r
- case BT_SOL:\r
- if (state != inValue)\r
- return nAtts;\r
- break;\r
- default:\r
- break;\r
- }\r
- }\r
- /* not reached */\r
-}\r
-\r
-static int PTRFASTCALL\r
-PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)\r
-{\r
- int result = 0;\r
- /* skip &# */\r
- ptr += 2*MINBPC(enc);\r
- if (CHAR_MATCHES(enc, ptr, ASCII_x)) {\r
- for (ptr += MINBPC(enc);\r
- !CHAR_MATCHES(enc, ptr, ASCII_SEMI);\r
- ptr += MINBPC(enc)) {\r
- int c = BYTE_TO_ASCII(enc, ptr);\r
- switch (c) {\r
- case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:\r
- case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:\r
- result <<= 4;\r
- result |= (c - ASCII_0);\r
- break;\r
- case ASCII_A: case ASCII_B: case ASCII_C:\r
- case ASCII_D: case ASCII_E: case ASCII_F:\r
- result <<= 4;\r
- result += 10 + (c - ASCII_A);\r
- break;\r
- case ASCII_a: case ASCII_b: case ASCII_c:\r
- case ASCII_d: case ASCII_e: case ASCII_f:\r
- result <<= 4;\r
- result += 10 + (c - ASCII_a);\r
- break;\r
- }\r
- if (result >= 0x110000)\r
- return -1;\r
- }\r
- }\r
- else {\r
- for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {\r
- int c = BYTE_TO_ASCII(enc, ptr);\r
- result *= 10;\r
- result += (c - ASCII_0);\r
- if (result >= 0x110000)\r
- return -1;\r
- }\r
- }\r
- return checkCharRefNumber(result);\r
-}\r
-\r
-static int PTRCALL\r
-PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,\r
- const char *end)\r
-{\r
- switch ((end - ptr)/MINBPC(enc)) {\r
- case 2:\r
- if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {\r
- switch (BYTE_TO_ASCII(enc, ptr)) {\r
- case ASCII_l:\r
- return ASCII_LT;\r
- case ASCII_g:\r
- return ASCII_GT;\r
- }\r
- }\r
- break;\r
- case 3:\r
- if (CHAR_MATCHES(enc, ptr, ASCII_a)) {\r
- ptr += MINBPC(enc);\r
- if (CHAR_MATCHES(enc, ptr, ASCII_m)) {\r
- ptr += MINBPC(enc);\r
- if (CHAR_MATCHES(enc, ptr, ASCII_p))\r
- return ASCII_AMP;\r
- }\r
- }\r
- break;\r
- case 4:\r
- switch (BYTE_TO_ASCII(enc, ptr)) {\r
- case ASCII_q:\r
- ptr += MINBPC(enc);\r
- if (CHAR_MATCHES(enc, ptr, ASCII_u)) {\r
- ptr += MINBPC(enc);\r
- if (CHAR_MATCHES(enc, ptr, ASCII_o)) {\r
- ptr += MINBPC(enc);\r
- if (CHAR_MATCHES(enc, ptr, ASCII_t))\r
- return ASCII_QUOT;\r
- }\r
- }\r
- break;\r
- case ASCII_a:\r
- ptr += MINBPC(enc);\r
- if (CHAR_MATCHES(enc, ptr, ASCII_p)) {\r
- ptr += MINBPC(enc);\r
- if (CHAR_MATCHES(enc, ptr, ASCII_o)) {\r
- ptr += MINBPC(enc);\r
- if (CHAR_MATCHES(enc, ptr, ASCII_s))\r
- return ASCII_APOS;\r
- }\r
- }\r
- break;\r
- }\r
- }\r
- return 0;\r
-}\r
-\r
-static int PTRCALL\r
-PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)\r
-{\r
- for (;;) {\r
- switch (BYTE_TYPE(enc, ptr1)) {\r
-#define LEAD_CASE(n) \\r
- case BT_LEAD ## n: \\r
- if (*ptr1++ != *ptr2++) \\r
- return 0;\r
- LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)\r
-#undef LEAD_CASE\r
- /* fall through */\r
- if (*ptr1++ != *ptr2++)\r
- return 0;\r
- break;\r
- case BT_NONASCII:\r
- case BT_NMSTRT:\r
-#ifdef XML_NS\r
- case BT_COLON:\r
-#endif\r
- case BT_HEX:\r
- case BT_DIGIT:\r
- case BT_NAME:\r
- case BT_MINUS:\r
- if (*ptr2++ != *ptr1++)\r
- return 0;\r
- if (MINBPC(enc) > 1) {\r
- if (*ptr2++ != *ptr1++)\r
- return 0;\r
- if (MINBPC(enc) > 2) {\r
- if (*ptr2++ != *ptr1++)\r
- return 0;\r
- if (MINBPC(enc) > 3) {\r
- if (*ptr2++ != *ptr1++)\r
- return 0;\r
- }\r
- }\r
- }\r
- break;\r
- default:\r
- if (MINBPC(enc) == 1 && *ptr1 == *ptr2)\r
- return 1;\r
- switch (BYTE_TYPE(enc, ptr2)) {\r
- case BT_LEAD2:\r
- case BT_LEAD3:\r
- case BT_LEAD4:\r
- case BT_NONASCII:\r
- case BT_NMSTRT:\r
-#ifdef XML_NS\r
- case BT_COLON:\r
-#endif\r
- case BT_HEX:\r
- case BT_DIGIT:\r
- case BT_NAME:\r
- case BT_MINUS:\r
- return 0;\r
- default:\r
- return 1;\r
- }\r
- }\r
- }\r
- /* not reached */\r
-}\r
-\r
-static int PTRCALL\r
-PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,\r
- const char *end1, const char *ptr2)\r
-{\r
- for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {\r
- if (ptr1 == end1)\r
- return 0;\r
- if (!CHAR_MATCHES(enc, ptr1, *ptr2))\r
- return 0;\r
- }\r
- return ptr1 == end1;\r
-}\r
-\r
-static int PTRFASTCALL\r
-PREFIX(nameLength)(const ENCODING *enc, const char *ptr)\r
-{\r
- const char *start = ptr;\r
- for (;;) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
-#define LEAD_CASE(n) \\r
- case BT_LEAD ## n: ptr += n; break;\r
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)\r
-#undef LEAD_CASE\r
- case BT_NONASCII:\r
- case BT_NMSTRT:\r
-#ifdef XML_NS\r
- case BT_COLON:\r
-#endif\r
- case BT_HEX:\r
- case BT_DIGIT:\r
- case BT_NAME:\r
- case BT_MINUS:\r
- ptr += MINBPC(enc);\r
- break;\r
- default:\r
- return (int)(ptr - start);\r
- }\r
- }\r
-}\r
-\r
-static const char * PTRFASTCALL\r
-PREFIX(skipS)(const ENCODING *enc, const char *ptr)\r
-{\r
- for (;;) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
- case BT_LF:\r
- case BT_CR:\r
- case BT_S:\r
- ptr += MINBPC(enc);\r
- break;\r
- default:\r
- return ptr;\r
- }\r
- }\r
-}\r
-\r
-static void PTRCALL\r
-PREFIX(updatePosition)(const ENCODING *enc,\r
- const char *ptr,\r
- const char *end,\r
- POSITION *pos)\r
-{\r
- while (ptr < end) {\r
- switch (BYTE_TYPE(enc, ptr)) {\r
-#define LEAD_CASE(n) \\r
- case BT_LEAD ## n: \\r
- ptr += n; \\r
- break;\r
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)\r
-#undef LEAD_CASE\r
- case BT_LF:\r
- pos->columnNumber = (XML_Size)-1;\r
- pos->lineNumber++;\r
- ptr += MINBPC(enc);\r
- break;\r
- case BT_CR:\r
- pos->lineNumber++;\r
- ptr += MINBPC(enc);\r
- if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)\r
- ptr += MINBPC(enc);\r
- pos->columnNumber = (XML_Size)-1;\r
- break;\r
- default:\r
- ptr += MINBPC(enc);\r
- break;\r
- }\r
- pos->columnNumber++;\r
- }\r
-}\r
-\r
-#undef DO_LEAD_CASE\r
-#undef MULTIBYTE_CASES\r
-#undef INVALID_CASES\r
-#undef CHECK_NAME_CASE\r
-#undef CHECK_NAME_CASES\r
-#undef CHECK_NMSTRT_CASE\r
-#undef CHECK_NMSTRT_CASES\r
-\r
-#endif /* XML_TOK_IMPL_C */\r