]>
Commit | Line | Data |
---|---|---|
14b0e578 CS |
1 | #ifndef REGENC_H\r |
2 | #define REGENC_H\r | |
3 | /**********************************************************************\r | |
4 | regenc.h - Oniguruma (regular expression library)\r | |
5 | **********************************************************************/\r | |
6 | /*-\r | |
b26691c4 | 7 | * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r |
14b0e578 CS |
8 | * All rights reserved.\r |
9 | *\r | |
10 | * Redistribution and use in source and binary forms, with or without\r | |
11 | * modification, are permitted provided that the following conditions\r | |
12 | * are met:\r | |
13 | * 1. Redistributions of source code must retain the above copyright\r | |
14 | * notice, this list of conditions and the following disclaimer.\r | |
15 | * 2. Redistributions in binary form must reproduce the above copyright\r | |
16 | * notice, this list of conditions and the following disclaimer in the\r | |
17 | * documentation and/or other materials provided with the distribution.\r | |
18 | *\r | |
19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r | |
20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r | |
21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r | |
22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r | |
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r | |
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r | |
25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r | |
26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r | |
27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r | |
28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r | |
29 | * SUCH DAMAGE.\r | |
30 | */\r | |
31 | \r | |
b602265d DG |
32 | #ifndef ONIGURUMA_EXPORT\r |
33 | #define ONIGURUMA_EXPORT\r | |
14b0e578 CS |
34 | #endif\r |
35 | \r | |
b602265d DG |
36 | //#include "config.h"\r |
37 | //#include <stddef.h>\r | |
38 | \r | |
14b0e578 CS |
39 | #ifdef ONIG_ESCAPE_UCHAR_COLLISION\r |
40 | #undef ONIG_ESCAPE_UCHAR_COLLISION\r | |
41 | #endif\r | |
42 | \r | |
43 | #include "oniguruma.h"\r | |
44 | \r | |
45 | typedef struct {\r | |
46 | OnigCodePoint from;\r | |
47 | OnigCodePoint to;\r | |
48 | } OnigPairCaseFoldCodes;\r | |
49 | \r | |
50 | \r | |
51 | #ifndef NULL\r | |
52 | #define NULL ((void* )0)\r | |
53 | #endif\r | |
54 | \r | |
55 | #ifndef TRUE\r | |
56 | #define TRUE 1\r | |
57 | #endif\r | |
58 | \r | |
59 | #ifndef FALSE\r | |
60 | #define FALSE 0\r | |
61 | #endif\r | |
62 | \r | |
63 | #ifndef ARG_UNUSED\r | |
64 | #if defined(__GNUC__)\r | |
65 | # define ARG_UNUSED __attribute__ ((unused))\r | |
66 | #else\r | |
67 | # define ARG_UNUSED\r | |
68 | #endif\r | |
69 | #endif\r | |
70 | \r | |
71 | #define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)\r | |
72 | #define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)\r | |
73 | #define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL\r | |
74 | #define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)\r | |
75 | \r | |
b602265d DG |
76 | #define MAX_CODE_POINT (~((OnigCodePoint )0))\r |
77 | \r | |
78 | #define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p)\r | |
14b0e578 CS |
79 | \r |
80 | /* character types bit flag */\r | |
81 | #define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE)\r | |
82 | #define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA)\r | |
83 | #define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK)\r | |
84 | #define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL)\r | |
85 | #define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT)\r | |
86 | #define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH)\r | |
87 | #define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER)\r | |
88 | #define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT)\r | |
89 | #define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT)\r | |
90 | #define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE)\r | |
91 | #define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER)\r | |
92 | #define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT)\r | |
93 | #define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD)\r | |
94 | #define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM)\r | |
95 | #define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII)\r | |
96 | \r | |
97 | #define CTYPE_TO_BIT(ctype) (1<<(ctype))\r | |
98 | #define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \\r | |
99 | ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\\r | |
100 | (ctype) == ONIGENC_CTYPE_PRINT)\r | |
101 | \r | |
102 | \r | |
103 | typedef struct {\r | |
104 | UChar *name;\r | |
105 | int ctype;\r | |
106 | short int len;\r | |
107 | } PosixBracketEntryType;\r | |
108 | \r | |
b602265d DG |
109 | struct PropertyNameCtype {\r |
110 | char *name;\r | |
111 | int ctype;\r | |
112 | };\r | |
14b0e578 CS |
113 | \r |
114 | /* #define USE_CRNL_AS_LINE_TERMINATOR */\r | |
115 | #define USE_UNICODE_PROPERTIES\r | |
b602265d | 116 | #define USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER\r |
b26691c4 | 117 | #define USE_UNICODE_WORD_BREAK\r |
14b0e578 | 118 | /* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */\r |
b602265d | 119 | /* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */\r |
14b0e578 CS |
120 | \r |
121 | \r | |
122 | #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII\r | |
123 | \r | |
b602265d | 124 | \r |
b26691c4 LG |
125 | #define ENC_SKIP_OFFSET_1_OR_0 7\r |
126 | \r | |
b602265d DG |
127 | #define ENC_FLAG_ASCII_COMPATIBLE (1<<0)\r |
128 | #define ENC_FLAG_UNICODE (1<<1)\r | |
b26691c4 LG |
129 | #define ENC_FLAG_SKIP_OFFSET_MASK (7<<2)\r |
130 | #define ENC_FLAG_SKIP_OFFSET_0 0\r | |
131 | #define ENC_FLAG_SKIP_OFFSET_1 (1<<2)\r | |
132 | #define ENC_FLAG_SKIP_OFFSET_2 (2<<2)\r | |
133 | #define ENC_FLAG_SKIP_OFFSET_3 (3<<2)\r | |
134 | #define ENC_FLAG_SKIP_OFFSET_4 (4<<2)\r | |
135 | #define ENC_FLAG_SKIP_OFFSET_1_OR_0 (ENC_SKIP_OFFSET_1_OR_0<<2)\r | |
136 | \r | |
137 | #define ENC_GET_SKIP_OFFSET(enc) \\r | |
138 | (((enc)->flag & ENC_FLAG_SKIP_OFFSET_MASK)>>2)\r | |
b602265d DG |
139 | \r |
140 | \r | |
14b0e578 | 141 | /* for encoding system implementation (internal) */\r |
b602265d DG |
142 | extern int onigenc_end(void);\r |
143 | extern int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));\r | |
144 | extern int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));\r | |
145 | extern int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));\r | |
146 | extern int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));\r | |
147 | extern int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]));\r | |
148 | extern int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));\r | |
14b0e578 CS |
149 | \r |
150 | \r | |
151 | /* methods for single byte encoding */\r | |
b602265d DG |
152 | extern int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));\r |
153 | extern int onigenc_single_byte_mbc_enc_len P_((const UChar* p));\r | |
154 | extern OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));\r | |
155 | extern int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));\r | |
156 | extern int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));\r | |
157 | extern UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));\r | |
158 | extern int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));\r | |
159 | extern int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end));\r | |
160 | extern int onigenc_always_true_is_valid_mbc_string P_((const UChar* s, const UChar* end));\r | |
161 | extern int onigenc_length_check_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end));\r | |
14b0e578 CS |
162 | \r |
163 | /* methods for multi byte encoding */\r | |
b602265d DG |
164 | extern OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));\r |
165 | extern int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));\r | |
166 | extern int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));\r | |
167 | extern int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));\r | |
168 | extern int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));\r | |
169 | extern int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));\r | |
170 | extern int onigenc_is_mbc_word_ascii P_((OnigEncoding enc, UChar* s, const UChar* end));\r | |
171 | extern int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));\r | |
172 | extern int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));\r | |
173 | extern int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));\r | |
174 | extern int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));\r | |
175 | extern struct PropertyNameCtype* onigenc_euc_jp_lookup_property_name P_((register const char *str, register size_t len));\r | |
176 | extern struct PropertyNameCtype* onigenc_sjis_lookup_property_name P_((register const char *str, register size_t len));\r | |
14b0e578 | 177 | \r |
b26691c4 | 178 | /* in unicode.c */\r |
b602265d DG |
179 | extern int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));\r |
180 | extern int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));\r | |
181 | extern int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[]));\r | |
182 | extern int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));\r | |
183 | extern int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));\r | |
184 | extern int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));\r | |
b26691c4 | 185 | \r |
b602265d | 186 | extern int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end));\r |
14b0e578 | 187 | \r |
b26691c4 LG |
188 | #ifdef USE_UNICODE_WORD_BREAK\r |
189 | extern int onigenc_wb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end));\r | |
190 | #endif\r | |
14b0e578 CS |
191 | \r |
192 | #define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)\r | |
193 | #define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)\r | |
194 | \r | |
b602265d DG |
195 | /* from unicode generated codes */\r |
196 | #define FOLDS1_FOLD(i) (OnigUnicodeFolds1 + (i))\r | |
197 | #define FOLDS2_FOLD(i) (OnigUnicodeFolds2 + (i))\r | |
198 | #define FOLDS3_FOLD(i) (OnigUnicodeFolds3 + (i))\r | |
199 | #define FOLDS1_UNFOLDS_NUM(i) (OnigUnicodeFolds1[(i)+1])\r | |
200 | #define FOLDS2_UNFOLDS_NUM(i) (OnigUnicodeFolds2[(i)+2])\r | |
201 | #define FOLDS3_UNFOLDS_NUM(i) (OnigUnicodeFolds3[(i)+3])\r | |
202 | #define FOLDS1_UNFOLDS(i) (OnigUnicodeFolds1 + (i) + 2)\r | |
203 | #define FOLDS2_UNFOLDS(i) (OnigUnicodeFolds2 + (i) + 3)\r | |
204 | #define FOLDS3_UNFOLDS(i) (OnigUnicodeFolds3 + (i) + 4)\r | |
205 | #define FOLDS1_NEXT_INDEX(i) ((i) + 2 + OnigUnicodeFolds1[(i)+1])\r | |
206 | #define FOLDS2_NEXT_INDEX(i) ((i) + 3 + OnigUnicodeFolds2[(i)+2])\r | |
207 | #define FOLDS3_NEXT_INDEX(i) ((i) + 4 + OnigUnicodeFolds3[(i)+3])\r | |
208 | \r | |
209 | #define FOLDS_FOLD_ADDR_BUK(buk, addr) do {\\r | |
210 | if ((buk)->fold_len == 1)\\r | |
211 | addr = OnigUnicodeFolds1 + (buk)->index;\\r | |
212 | else if ((buk)->fold_len == 2)\\r | |
213 | addr = OnigUnicodeFolds2 + (buk)->index;\\r | |
214 | else if ((buk)->fold_len == 3)\\r | |
215 | addr = OnigUnicodeFolds3 + (buk)->index;\\r | |
216 | else\\r | |
a5def177 | 217 | return ONIGERR_INVALID_CODE_POINT_VALUE;\\r |
b602265d DG |
218 | } while (0)\r |
219 | \r | |
220 | extern OnigCodePoint OnigUnicodeFolds1[];\r | |
221 | extern OnigCodePoint OnigUnicodeFolds2[];\r | |
222 | extern OnigCodePoint OnigUnicodeFolds3[];\r | |
223 | \r | |
224 | struct ByUnfoldKey {\r | |
225 | OnigCodePoint code;\r | |
226 | short int index;\r | |
227 | short int fold_len;\r | |
228 | };\r | |
229 | \r | |
230 | extern const struct ByUnfoldKey* onigenc_unicode_unfold_key(OnigCodePoint code);\r | |
231 | extern int onigenc_unicode_fold1_key(OnigCodePoint code[]);\r | |
232 | extern int onigenc_unicode_fold2_key(OnigCodePoint code[]);\r | |
233 | extern int onigenc_unicode_fold3_key(OnigCodePoint code[]);\r | |
234 | \r | |
235 | extern int onig_codes_cmp(OnigCodePoint a[], OnigCodePoint b[], int n);\r | |
236 | extern int onig_codes_byte_at(OnigCodePoint code[], int at);\r | |
237 | \r | |
238 | \r | |
239 | \r | |
14b0e578 CS |
240 | #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \\r |
241 | OnigEncISO_8859_1_ToLowerCaseTable[c]\r | |
242 | #define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \\r | |
243 | OnigEncISO_8859_1_ToUpperCaseTable[c]\r | |
244 | \r | |
b602265d DG |
245 | extern const UChar OnigEncISO_8859_1_ToLowerCaseTable[];\r |
246 | extern const UChar OnigEncISO_8859_1_ToUpperCaseTable[];\r | |
14b0e578 | 247 | \r |
b602265d | 248 | extern int\r |
14b0e578 | 249 | onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));\r |
b602265d | 250 | extern UChar*\r |
14b0e578 CS |
251 | onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));\r |
252 | \r | |
253 | /* defined in regexec.c, but used in enc/xxx.c */\r | |
254 | extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));\r | |
255 | \r | |
b602265d DG |
256 | extern OnigEncoding OnigEncDefaultCharEncoding;\r |
257 | extern const UChar OnigEncAsciiToLowerCaseTable[];\r | |
258 | extern const UChar OnigEncAsciiToUpperCaseTable[];\r | |
259 | extern const unsigned short OnigEncAsciiCtypeTable[];\r | |
260 | \r | |
14b0e578 CS |
261 | \r |
262 | #define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80)\r | |
263 | #define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]\r | |
264 | #define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]\r | |
265 | #define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \\r | |
266 | ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)\r | |
b602265d DG |
267 | #define ONIGENC_IS_ASCII_CODE_WORD(code) \\r |
268 | ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ONIGENC_CTYPE_WORD)) != 0)\r | |
14b0e578 CS |
269 | #define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \\r |
270 | (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\\r | |
271 | ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))\r | |
b26691c4 | 272 | \r |
b602265d DG |
273 | #define ONIGENC_IS_UNICODE_ENCODING(enc) \\r |
274 | (((enc)->flag & ENC_FLAG_UNICODE) != 0)\r | |
275 | \r | |
276 | #define ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc) \\r | |
277 | (((enc)->flag & ENC_FLAG_ASCII_COMPATIBLE) != 0)\r | |
14b0e578 CS |
278 | \r |
279 | #endif /* REGENC_H */\r |