1 /**********************************************************************
2 regposix.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #define regex_t onig_regex_t
33 #include "onigposix.h"
35 #define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
36 #define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig))
38 /* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */
39 #define ENC_STRING_LEN(enc,s,len) do { \
40 if (ONIGENC_MBC_MINLEN(enc) == 1) { \
41 UChar* tmps = (UChar* )(s); \
42 while (*tmps != 0) tmps++; \
43 len = (int )(tmps - (UChar* )(s));\
46 len = onigenc_str_bytelen_null(enc, (UChar* )s); \
56 onig2posix_error_code(int code
)
58 static const O2PERR o2p
[] = {
59 { ONIG_MISMATCH
, REG_NOMATCH
},
60 { ONIG_NO_SUPPORT_CONFIG
, REG_EONIG_INTERNAL
},
61 { ONIG_ABORT
, REG_EONIG_INTERNAL
},
62 { ONIGERR_MEMORY
, REG_ESPACE
},
63 { ONIGERR_MATCH_STACK_LIMIT_OVER
, REG_EONIG_INTERNAL
},
64 { ONIGERR_RETRY_LIMIT_IN_MATCH_OVER
, REG_EONIG_INTERNAL
},
65 { ONIGERR_TYPE_BUG
, REG_EONIG_INTERNAL
},
66 { ONIGERR_PARSER_BUG
, REG_EONIG_INTERNAL
},
67 { ONIGERR_STACK_BUG
, REG_EONIG_INTERNAL
},
68 { ONIGERR_UNDEFINED_BYTECODE
, REG_EONIG_INTERNAL
},
69 { ONIGERR_UNEXPECTED_BYTECODE
, REG_EONIG_INTERNAL
},
70 { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED
, REG_EONIG_BADARG
},
71 { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR
, REG_EONIG_BADARG
},
72 { ONIGERR_FAIL_TO_INITIALIZE
, REG_EONIG_INTERNAL
},
73 { ONIGERR_INVALID_ARGUMENT
, REG_EONIG_BADARG
},
74 { ONIGERR_END_PATTERN_AT_LEFT_BRACE
, REG_EBRACE
},
75 { ONIGERR_END_PATTERN_AT_LEFT_BRACKET
, REG_EBRACK
},
76 { ONIGERR_EMPTY_CHAR_CLASS
, REG_ECTYPE
},
77 { ONIGERR_PREMATURE_END_OF_CHAR_CLASS
, REG_ECTYPE
},
78 { ONIGERR_END_PATTERN_AT_ESCAPE
, REG_EESCAPE
},
79 { ONIGERR_END_PATTERN_AT_META
, REG_EESCAPE
},
80 { ONIGERR_END_PATTERN_AT_CONTROL
, REG_EESCAPE
},
81 { ONIGERR_META_CODE_SYNTAX
, REG_BADPAT
},
82 { ONIGERR_CONTROL_CODE_SYNTAX
, REG_BADPAT
},
83 { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE
, REG_ECTYPE
},
84 { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE
, REG_ECTYPE
},
85 { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS
, REG_ECTYPE
},
86 { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED
, REG_BADRPT
},
87 { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID
, REG_BADRPT
},
88 { ONIGERR_NESTED_REPEAT_OPERATOR
, REG_BADRPT
},
89 { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS
, REG_EPAREN
},
90 { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS
, REG_EPAREN
},
91 { ONIGERR_END_PATTERN_IN_GROUP
, REG_BADPAT
},
92 { ONIGERR_UNDEFINED_GROUP_OPTION
, REG_BADPAT
},
93 { ONIGERR_INVALID_POSIX_BRACKET_TYPE
, REG_BADPAT
},
94 { ONIGERR_INVALID_LOOK_BEHIND_PATTERN
, REG_BADPAT
},
95 { ONIGERR_INVALID_REPEAT_RANGE_PATTERN
, REG_BADPAT
},
96 { ONIGERR_TOO_BIG_NUMBER
, REG_BADPAT
},
97 { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE
, REG_BADBR
},
98 { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE
, REG_BADBR
},
99 { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS
, REG_ECTYPE
},
100 { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE
, REG_ECTYPE
},
101 { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES
, REG_ECTYPE
},
102 { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING
, REG_BADPAT
},
103 { ONIGERR_TOO_BIG_BACKREF_NUMBER
, REG_ESUBREG
},
104 { ONIGERR_INVALID_BACKREF
, REG_ESUBREG
},
105 { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
, REG_BADPAT
},
106 { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
, REG_EONIG_BADWC
},
107 { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE
, REG_EONIG_BADWC
},
108 { ONIGERR_INVALID_CODE_POINT_VALUE
, REG_EONIG_BADWC
},
109 { ONIGERR_EMPTY_GROUP_NAME
, REG_BADPAT
},
110 { ONIGERR_INVALID_GROUP_NAME
, REG_BADPAT
},
111 { ONIGERR_INVALID_CHAR_IN_GROUP_NAME
, REG_BADPAT
},
112 { ONIGERR_UNDEFINED_NAME_REFERENCE
, REG_BADPAT
},
113 { ONIGERR_UNDEFINED_GROUP_REFERENCE
, REG_BADPAT
},
114 { ONIGERR_MULTIPLEX_DEFINED_NAME
, REG_BADPAT
},
115 { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
, REG_BADPAT
},
116 { ONIGERR_NEVER_ENDING_RECURSION
, REG_BADPAT
},
117 { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY
, REG_BADPAT
},
118 { ONIGERR_INVALID_CHAR_PROPERTY_NAME
, REG_BADPAT
},
119 { ONIGERR_INVALID_IF_ELSE_SYNTAX
, REG_BADPAT
},
120 { ONIGERR_INVALID_ABSENT_GROUP_PATTERN
, REG_BADPAT
},
121 { ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN
, REG_BADPAT
},
122 { ONIGERR_INVALID_CALLOUT_PATTERN
, REG_BADPAT
},
123 { ONIGERR_INVALID_CALLOUT_NAME
, REG_BADPAT
},
124 { ONIGERR_UNDEFINED_CALLOUT_NAME
, REG_BADPAT
},
125 { ONIGERR_INVALID_CALLOUT_BODY
, REG_BADPAT
},
126 { ONIGERR_INVALID_CALLOUT_TAG_NAME
, REG_BADPAT
},
127 { ONIGERR_INVALID_CALLOUT_ARG
, REG_BADPAT
},
128 { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION
, REG_EONIG_BADARG
},
129 { ONIGERR_LIBRARY_IS_NOT_INITIALIZED
, REG_EONIG_INTERNAL
}
134 if (code
>= 0) return 0;
136 for (i
= 0; i
< (int )(sizeof(o2p
) / sizeof(o2p
[0])); i
++) {
137 if (code
== o2p
[i
].onig_err
)
138 return o2p
[i
].posix_err
;
141 return REG_EONIG_INTERNAL
; /* but, unknown error code */
145 regcomp(regex_t
* reg
, const char* pattern
, int posix_options
)
148 OnigSyntaxType
* syntax
= OnigDefaultSyntax
;
149 OnigOptionType options
;
151 if ((posix_options
& REG_EXTENDED
) == 0)
152 syntax
= ONIG_SYNTAX_POSIX_BASIC
;
154 options
= syntax
->options
;
155 if ((posix_options
& REG_ICASE
) != 0)
156 ONIG_OPTION_ON(options
, ONIG_OPTION_IGNORECASE
);
157 if ((posix_options
& REG_NEWLINE
) != 0) {
158 ONIG_OPTION_ON( options
, ONIG_OPTION_NEGATE_SINGLELINE
);
159 ONIG_OPTION_OFF(options
, ONIG_OPTION_SINGLELINE
);
162 reg
->comp_options
= posix_options
;
164 ENC_STRING_LEN(OnigEncDefaultCharEncoding
, pattern
, len
);
165 r
= onig_new(PONIG_C(reg
), (UChar
* )pattern
, (UChar
* )(pattern
+ len
),
166 options
, OnigEncDefaultCharEncoding
, syntax
,
167 (OnigErrorInfo
* )NULL
);
168 if (r
!= ONIG_NORMAL
) {
169 return onig2posix_error_code(r
);
172 reg
->re_nsub
= ONIG_C(reg
)->num_mem
;
177 regexec(regex_t
* reg
, const char* str
, size_t nmatch
,
178 regmatch_t pmatch
[], int posix_options
)
183 OnigOptionType options
;
185 options
= ONIG_OPTION_POSIX_REGION
;
186 if ((posix_options
& REG_NOTBOL
) != 0) options
|= ONIG_OPTION_NOTBOL
;
187 if ((posix_options
& REG_NOTEOL
) != 0) options
|= ONIG_OPTION_NOTEOL
;
189 if (nmatch
== 0 || (reg
->comp_options
& REG_NOSUB
) != 0) {
190 pm
= (regmatch_t
* )NULL
;
193 else if ((int )nmatch
< ONIG_C(reg
)->num_mem
+ 1) {
194 pm
= (regmatch_t
* )xmalloc(sizeof(regmatch_t
)
195 * (ONIG_C(reg
)->num_mem
+ 1));
203 ENC_STRING_LEN(ONIG_C(reg
)->enc
, str
, len
);
204 end
= (UChar
* )(str
+ len
);
205 r
= onig_search(ONIG_C(reg
), (UChar
* )str
, end
, (UChar
* )str
, end
,
206 (OnigRegion
* )pm
, options
);
210 if (pm
!= pmatch
&& pm
!= NULL
) {
211 xmemcpy(pmatch
, pm
, sizeof(regmatch_t
) * nmatch
);
214 else if (r
== ONIG_MISMATCH
) {
216 for (i
= 0; i
< (int )nmatch
; i
++)
217 pmatch
[i
].rm_so
= pmatch
[i
].rm_eo
= ONIG_REGION_NOTPOS
;
220 r
= onig2posix_error_code(r
);
223 if (pm
!= pmatch
&& pm
!= NULL
)
227 if (reg
->re_nsub
> nmatch
- 1)
228 reg
->re_nsub
= (nmatch
<= 1 ? 0 : nmatch
- 1);
235 regfree(regex_t
* reg
)
237 onig_free(ONIG_C(reg
));
242 reg_set_encoding(int mb_code
)
247 case REG_POSIX_ENCODING_ASCII
:
248 enc
= ONIG_ENCODING_ASCII
;
250 case REG_POSIX_ENCODING_UTF16_LE
:
251 enc
= ONIG_ENCODING_UTF16_LE
;
255 onig_initialize(&enc
, 1);
257 onigenc_set_default_encoding(enc
);
261 reg_name_to_group_numbers(regex_t
* reg
,
262 const unsigned char* name
, const unsigned char* name_end
, int** nums
)
264 return onig_name_to_group_numbers(ONIG_C(reg
), name
, name_end
, nums
);
268 int (*func
)(const unsigned char*, const unsigned char*,int,int*,regex_t
*,void*);
274 i_wrapper(const UChar
* name
, const UChar
* name_end
, int ng
, int* gs
,
275 onig_regex_t
* reg ARG_UNUSED
, void* arg
)
277 i_wrap
* warg
= (i_wrap
* )arg
;
279 return (*warg
->func
)(name
, name_end
, ng
, gs
, warg
->reg
, warg
->arg
);
283 reg_foreach_name(regex_t
* reg
,
284 int (*func
)(const unsigned char*, const unsigned char*,int,int*,regex_t
*,void*),
293 return onig_foreach_name(ONIG_C(reg
), i_wrapper
, &warg
);
297 reg_number_of_names(regex_t
* reg
)
299 return onig_number_of_names(ONIG_C(reg
));