]>
Commit | Line | Data |
---|---|---|
14b0e578 CS |
1 | /**********************************************************************\r |
2 | regposix.c - Oniguruma (regular expression library)\r | |
3 | **********************************************************************/\r | |
4 | /*-\r | |
5 | * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r | |
6 | * All rights reserved.\r | |
7 | *\r | |
0af8e57c | 8 | * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>\r |
14b0e578 CS |
9 | *\r |
10 | * Redistribution and use in source and binary forms, with or without\r | |
11 | * modification, are permitted provided that the following conditions\r | |
12 | * are met:\r | |
13 | * 1. Redistributions of source code must retain the above copyright\r | |
14 | * notice, this list of conditions and the following disclaimer.\r | |
15 | * 2. Redistributions in binary form must reproduce the above copyright\r | |
16 | * notice, this list of conditions and the following disclaimer in the\r | |
17 | * documentation and/or other materials provided with the distribution.\r | |
18 | *\r | |
19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r | |
20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r | |
21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r | |
22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r | |
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r | |
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r | |
25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r | |
26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r | |
27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r | |
28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r | |
29 | * SUCH DAMAGE.\r | |
30 | */\r | |
31 | \r | |
32 | #define regex_t onig_regex_t\r | |
33 | #include "regint.h"\r | |
34 | #undef regex_t\r | |
35 | #include "onigposix.h"\r | |
36 | \r | |
37 | #define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))\r | |
38 | #define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig))\r | |
39 | \r | |
40 | /* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */\r | |
41 | #define ENC_STRING_LEN(enc,s,len) do { \\r | |
42 | if (ONIGENC_MBC_MINLEN(enc) == 1) { \\r | |
43 | UChar* tmps = (UChar* )(s); \\r | |
44 | while (*tmps != 0) tmps++; \\r | |
45 | len = (int)(tmps - (UChar* )(s)); \\r | |
46 | } \\r | |
47 | else { \\r | |
48 | len = onigenc_str_bytelen_null(enc, (UChar* )s); \\r | |
49 | } \\r | |
50 | } while(0)\r | |
51 | \r | |
52 | typedef struct {\r | |
53 | int onig_err;\r | |
54 | int posix_err;\r | |
55 | } O2PERR;\r | |
56 | \r | |
57 | static int\r | |
58 | onig2posix_error_code(int code)\r | |
59 | {\r | |
60 | static const O2PERR o2p[] = {\r | |
61 | { ONIG_MISMATCH, REG_NOMATCH },\r | |
62 | { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },\r | |
63 | { ONIGERR_MEMORY, REG_ESPACE },\r | |
64 | { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },\r | |
65 | { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },\r | |
66 | { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },\r | |
67 | { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },\r | |
68 | { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },\r | |
69 | { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },\r | |
70 | { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG },\r | |
71 | { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },\r | |
72 | { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG },\r | |
73 | { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },\r | |
74 | { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },\r | |
75 | { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },\r | |
76 | { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },\r | |
77 | { ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE },\r | |
78 | { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE },\r | |
79 | { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },\r | |
80 | { ONIGERR_META_CODE_SYNTAX, REG_BADPAT },\r | |
81 | { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },\r | |
82 | { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },\r | |
83 | { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },\r | |
84 | { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },\r | |
85 | { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },\r | |
86 | { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },\r | |
87 | { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },\r | |
88 | { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },\r | |
89 | { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },\r | |
90 | { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT },\r | |
91 | { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },\r | |
92 | { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },\r | |
93 | { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },\r | |
94 | { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },\r | |
95 | { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT },\r | |
96 | { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },\r | |
97 | { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },\r | |
98 | { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },\r | |
99 | { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },\r | |
100 | { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },\r | |
101 | { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },\r | |
102 | { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },\r | |
103 | { ONIGERR_INVALID_BACKREF, REG_ESUBREG },\r | |
104 | { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT },\r | |
105 | { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },\r | |
106 | { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },\r | |
107 | { ONIGERR_INVALID_CODE_POINT_VALUE, REG_EONIG_BADWC },\r | |
108 | { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT },\r | |
109 | { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT },\r | |
110 | { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT },\r | |
111 | { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },\r | |
112 | { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },\r | |
113 | { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT },\r | |
114 | { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },\r | |
115 | { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },\r | |
116 | { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },\r | |
117 | { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },\r | |
118 | { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG },\r | |
119 | { ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }\r | |
120 | \r | |
121 | };\r | |
122 | \r | |
123 | int i;\r | |
124 | \r | |
125 | if (code >= 0) return 0;\r | |
126 | \r | |
127 | for (i = 0; i < (int )(sizeof(o2p) / sizeof(o2p[0])); i++) {\r | |
128 | if (code == o2p[i].onig_err)\r | |
129 | return o2p[i].posix_err;\r | |
130 | }\r | |
131 | \r | |
132 | return REG_EONIG_INTERNAL; /* but, unknown error code */\r | |
133 | }\r | |
134 | \r | |
135 | extern int\r | |
136 | regcomp(regex_t* reg, const char* pattern, int posix_options)\r | |
137 | {\r | |
138 | int r, len;\r | |
139 | OnigSyntaxType* syntax = OnigDefaultSyntax;\r | |
140 | OnigOptionType options;\r | |
141 | \r | |
142 | if ((posix_options & REG_EXTENDED) == 0)\r | |
143 | syntax = ONIG_SYNTAX_POSIX_BASIC;\r | |
144 | \r | |
145 | options = syntax->options;\r | |
146 | if ((posix_options & REG_ICASE) != 0)\r | |
147 | ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE);\r | |
148 | if ((posix_options & REG_NEWLINE) != 0) {\r | |
149 | ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE);\r | |
150 | ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE);\r | |
151 | }\r | |
152 | \r | |
153 | reg->comp_options = posix_options;\r | |
154 | \r | |
155 | ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);\r | |
156 | r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),\r | |
157 | options, OnigEncDefaultCharEncoding, syntax,\r | |
158 | (OnigErrorInfo* )NULL);\r | |
159 | if (r != ONIG_NORMAL) {\r | |
160 | return onig2posix_error_code(r);\r | |
161 | }\r | |
162 | \r | |
163 | reg->re_nsub = ONIG_C(reg)->num_mem;\r | |
164 | return 0;\r | |
165 | }\r | |
166 | \r | |
167 | extern int\r | |
168 | regexec(regex_t* reg, const char* str, size_t nmatch,\r | |
169 | regmatch_t pmatch[], int posix_options)\r | |
170 | {\r | |
171 | int r, i, len;\r | |
172 | UChar* end;\r | |
173 | regmatch_t* pm;\r | |
174 | OnigOptionType options;\r | |
175 | \r | |
176 | options = ONIG_OPTION_POSIX_REGION;\r | |
177 | if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;\r | |
178 | if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;\r | |
179 | \r | |
180 | if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) {\r | |
181 | pm = (regmatch_t* )NULL;\r | |
182 | nmatch = 0;\r | |
183 | }\r | |
184 | else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) {\r | |
185 | pm = (regmatch_t* )xmalloc(sizeof(regmatch_t)\r | |
186 | * (ONIG_C(reg)->num_mem + 1));\r | |
187 | if (pm == NULL)\r | |
188 | return REG_ESPACE;\r | |
189 | }\r | |
190 | else {\r | |
191 | pm = pmatch;\r | |
192 | }\r | |
193 | \r | |
194 | ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);\r | |
195 | end = (UChar* )(str + len);\r | |
196 | r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,\r | |
197 | (OnigRegion* )pm, options);\r | |
198 | \r | |
199 | if (r >= 0) {\r | |
200 | r = 0; /* Match */\r | |
201 | if (pm != pmatch && pm != NULL) {\r | |
202 | xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch);\r | |
203 | }\r | |
204 | }\r | |
205 | else if (r == ONIG_MISMATCH) {\r | |
206 | r = REG_NOMATCH;\r | |
207 | for (i = 0; i < (int )nmatch; i++)\r | |
208 | pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;\r | |
209 | }\r | |
210 | else {\r | |
211 | r = onig2posix_error_code(r);\r | |
212 | }\r | |
213 | \r | |
214 | if (pm != pmatch && pm != NULL)\r | |
215 | xfree(pm);\r | |
216 | \r | |
217 | #if 0\r | |
218 | if (reg->re_nsub > nmatch - 1)\r | |
219 | reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);\r | |
220 | #endif\r | |
221 | \r | |
222 | return r;\r | |
223 | }\r | |
224 | \r | |
225 | extern void\r | |
226 | regfree(regex_t* reg)\r | |
227 | {\r | |
228 | onig_free(ONIG_C(reg));\r | |
229 | }\r | |
230 | \r | |
231 | \r | |
232 | extern void\r | |
233 | reg_set_encoding(int mb_code)\r | |
234 | {\r | |
235 | OnigEncoding enc;\r | |
236 | \r | |
237 | switch (mb_code) {\r | |
238 | case REG_POSIX_ENCODING_ASCII:\r | |
239 | enc = ONIG_ENCODING_ASCII;\r | |
240 | break;\r | |
14b0e578 CS |
241 | case REG_POSIX_ENCODING_UTF16_LE:\r |
242 | enc = ONIG_ENCODING_UTF16_LE;\r | |
243 | break;\r | |
244 | \r | |
245 | default:\r | |
39bc77eb | 246 | return;\r |
14b0e578 CS |
247 | }\r |
248 | \r | |
249 | onigenc_set_default_encoding(enc);\r | |
250 | }\r | |
251 | \r | |
252 | extern int\r | |
253 | reg_name_to_group_numbers(regex_t* reg,\r | |
254 | const unsigned char* name, const unsigned char* name_end, int** nums)\r | |
255 | {\r | |
256 | return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);\r | |
257 | }\r | |
258 | \r | |
259 | typedef struct {\r | |
260 | int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*);\r | |
261 | regex_t* reg;\r | |
262 | void* arg;\r | |
263 | } i_wrap;\r | |
264 | \r | |
265 | static int\r | |
266 | i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs,\r | |
267 | onig_regex_t* reg ARG_UNUSED, void* arg)\r | |
268 | {\r | |
269 | i_wrap* warg = (i_wrap* )arg;\r | |
270 | \r | |
271 | return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg);\r | |
272 | }\r | |
273 | \r | |
274 | extern int\r | |
275 | reg_foreach_name(regex_t* reg,\r | |
276 | int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*),\r | |
277 | void* arg)\r | |
278 | {\r | |
279 | i_wrap warg;\r | |
280 | \r | |
281 | warg.func = func;\r | |
282 | warg.reg = reg;\r | |
283 | warg.arg = arg;\r | |
284 | \r | |
285 | return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg);\r | |
286 | }\r | |
287 | \r | |
288 | extern int\r | |
289 | reg_number_of_names(regex_t* reg)\r | |
290 | {\r | |
291 | return onig_number_of_names(ONIG_C(reg));\r | |
292 | }\r |