1 /**********************************************************************
2 regposix.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #define regex_t onig_regex_t
35 #include "onigposix.h"
37 #define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
38 #define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig))
40 /* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */
41 #define ENC_STRING_LEN(enc,s,len) do { \
42 if (ONIGENC_MBC_MINLEN(enc) == 1) { \
43 UChar* tmps = (UChar* )(s); \
44 while (*tmps != 0) tmps++; \
45 len = (int)(tmps - (UChar* )(s)); \
48 len = onigenc_str_bytelen_null(enc, (UChar* )s); \
58 onig2posix_error_code(int code
)
60 static const O2PERR o2p
[] = {
61 { ONIG_MISMATCH
, REG_NOMATCH
},
62 { ONIG_NO_SUPPORT_CONFIG
, REG_EONIG_INTERNAL
},
63 { ONIGERR_MEMORY
, REG_ESPACE
},
64 { ONIGERR_MATCH_STACK_LIMIT_OVER
, REG_EONIG_INTERNAL
},
65 { ONIGERR_TYPE_BUG
, REG_EONIG_INTERNAL
},
66 { ONIGERR_PARSER_BUG
, REG_EONIG_INTERNAL
},
67 { ONIGERR_STACK_BUG
, REG_EONIG_INTERNAL
},
68 { ONIGERR_UNDEFINED_BYTECODE
, REG_EONIG_INTERNAL
},
69 { ONIGERR_UNEXPECTED_BYTECODE
, REG_EONIG_INTERNAL
},
70 { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED
, REG_EONIG_BADARG
},
71 { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR
, REG_EONIG_BADARG
},
72 { ONIGERR_INVALID_ARGUMENT
, REG_EONIG_BADARG
},
73 { ONIGERR_END_PATTERN_AT_LEFT_BRACE
, REG_EBRACE
},
74 { ONIGERR_END_PATTERN_AT_LEFT_BRACKET
, REG_EBRACK
},
75 { ONIGERR_EMPTY_CHAR_CLASS
, REG_ECTYPE
},
76 { ONIGERR_PREMATURE_END_OF_CHAR_CLASS
, REG_ECTYPE
},
77 { ONIGERR_END_PATTERN_AT_ESCAPE
, REG_EESCAPE
},
78 { ONIGERR_END_PATTERN_AT_META
, REG_EESCAPE
},
79 { ONIGERR_END_PATTERN_AT_CONTROL
, REG_EESCAPE
},
80 { ONIGERR_META_CODE_SYNTAX
, REG_BADPAT
},
81 { ONIGERR_CONTROL_CODE_SYNTAX
, REG_BADPAT
},
82 { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE
, REG_ECTYPE
},
83 { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE
, REG_ECTYPE
},
84 { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS
, REG_ECTYPE
},
85 { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED
, REG_BADRPT
},
86 { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID
, REG_BADRPT
},
87 { ONIGERR_NESTED_REPEAT_OPERATOR
, REG_BADRPT
},
88 { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS
, REG_EPAREN
},
89 { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS
, REG_EPAREN
},
90 { ONIGERR_END_PATTERN_IN_GROUP
, REG_BADPAT
},
91 { ONIGERR_UNDEFINED_GROUP_OPTION
, REG_BADPAT
},
92 { ONIGERR_INVALID_POSIX_BRACKET_TYPE
, REG_BADPAT
},
93 { ONIGERR_INVALID_LOOK_BEHIND_PATTERN
, REG_BADPAT
},
94 { ONIGERR_INVALID_REPEAT_RANGE_PATTERN
, REG_BADPAT
},
95 { ONIGERR_TOO_BIG_NUMBER
, REG_BADPAT
},
96 { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE
, REG_BADBR
},
97 { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE
, REG_BADBR
},
98 { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS
, REG_ECTYPE
},
99 { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE
, REG_ECTYPE
},
100 { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES
, REG_ECTYPE
},
101 { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING
, REG_BADPAT
},
102 { ONIGERR_TOO_BIG_BACKREF_NUMBER
, REG_ESUBREG
},
103 { ONIGERR_INVALID_BACKREF
, REG_ESUBREG
},
104 { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
, REG_BADPAT
},
105 { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
, REG_EONIG_BADWC
},
106 { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE
, REG_EONIG_BADWC
},
107 { ONIGERR_INVALID_CODE_POINT_VALUE
, REG_EONIG_BADWC
},
108 { ONIGERR_EMPTY_GROUP_NAME
, REG_BADPAT
},
109 { ONIGERR_INVALID_GROUP_NAME
, REG_BADPAT
},
110 { ONIGERR_INVALID_CHAR_IN_GROUP_NAME
, REG_BADPAT
},
111 { ONIGERR_UNDEFINED_NAME_REFERENCE
, REG_BADPAT
},
112 { ONIGERR_UNDEFINED_GROUP_REFERENCE
, REG_BADPAT
},
113 { ONIGERR_MULTIPLEX_DEFINED_NAME
, REG_BADPAT
},
114 { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
, REG_BADPAT
},
115 { ONIGERR_NEVER_ENDING_RECURSION
, REG_BADPAT
},
116 { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY
, REG_BADPAT
},
117 { ONIGERR_INVALID_CHAR_PROPERTY_NAME
, REG_BADPAT
},
118 { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION
, REG_EONIG_BADARG
},
119 { ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT
, REG_EONIG_THREAD
}
125 if (code
>= 0) return 0;
127 for (i
= 0; i
< (int )(sizeof(o2p
) / sizeof(o2p
[0])); i
++) {
128 if (code
== o2p
[i
].onig_err
)
129 return o2p
[i
].posix_err
;
132 return REG_EONIG_INTERNAL
; /* but, unknown error code */
136 regcomp(regex_t
* reg
, const char* pattern
, int posix_options
)
139 OnigSyntaxType
* syntax
= OnigDefaultSyntax
;
140 OnigOptionType options
;
142 if ((posix_options
& REG_EXTENDED
) == 0)
143 syntax
= ONIG_SYNTAX_POSIX_BASIC
;
145 options
= syntax
->options
;
146 if ((posix_options
& REG_ICASE
) != 0)
147 ONIG_OPTION_ON(options
, ONIG_OPTION_IGNORECASE
);
148 if ((posix_options
& REG_NEWLINE
) != 0) {
149 ONIG_OPTION_ON( options
, ONIG_OPTION_NEGATE_SINGLELINE
);
150 ONIG_OPTION_OFF(options
, ONIG_OPTION_SINGLELINE
);
153 reg
->comp_options
= posix_options
;
155 ENC_STRING_LEN(OnigEncDefaultCharEncoding
, pattern
, len
);
156 r
= onig_new(PONIG_C(reg
), (UChar
* )pattern
, (UChar
* )(pattern
+ len
),
157 options
, OnigEncDefaultCharEncoding
, syntax
,
158 (OnigErrorInfo
* )NULL
);
159 if (r
!= ONIG_NORMAL
) {
160 return onig2posix_error_code(r
);
163 reg
->re_nsub
= ONIG_C(reg
)->num_mem
;
168 regexec(regex_t
* reg
, const char* str
, size_t nmatch
,
169 regmatch_t pmatch
[], int posix_options
)
174 OnigOptionType options
;
176 options
= ONIG_OPTION_POSIX_REGION
;
177 if ((posix_options
& REG_NOTBOL
) != 0) options
|= ONIG_OPTION_NOTBOL
;
178 if ((posix_options
& REG_NOTEOL
) != 0) options
|= ONIG_OPTION_NOTEOL
;
180 if (nmatch
== 0 || (reg
->comp_options
& REG_NOSUB
) != 0) {
181 pm
= (regmatch_t
* )NULL
;
184 else if ((int )nmatch
< ONIG_C(reg
)->num_mem
+ 1) {
185 pm
= (regmatch_t
* )xmalloc(sizeof(regmatch_t
)
186 * (ONIG_C(reg
)->num_mem
+ 1));
194 ENC_STRING_LEN(ONIG_C(reg
)->enc
, str
, len
);
195 end
= (UChar
* )(str
+ len
);
196 r
= onig_search(ONIG_C(reg
), (UChar
* )str
, end
, (UChar
* )str
, end
,
197 (OnigRegion
* )pm
, options
);
201 if (pm
!= pmatch
&& pm
!= NULL
) {
202 xmemcpy(pmatch
, pm
, sizeof(regmatch_t
) * nmatch
);
205 else if (r
== ONIG_MISMATCH
) {
207 for (i
= 0; i
< (int )nmatch
; i
++)
208 pmatch
[i
].rm_so
= pmatch
[i
].rm_eo
= ONIG_REGION_NOTPOS
;
211 r
= onig2posix_error_code(r
);
214 if (pm
!= pmatch
&& pm
!= NULL
)
218 if (reg
->re_nsub
> nmatch
- 1)
219 reg
->re_nsub
= (nmatch
<= 1 ? 0 : nmatch
- 1);
226 regfree(regex_t
* reg
)
228 onig_free(ONIG_C(reg
));
233 reg_set_encoding(int mb_code
)
238 case REG_POSIX_ENCODING_ASCII
:
239 enc
= ONIG_ENCODING_ASCII
;
241 case REG_POSIX_ENCODING_UTF16_LE
:
242 enc
= ONIG_ENCODING_UTF16_LE
;
249 onigenc_set_default_encoding(enc
);
253 reg_name_to_group_numbers(regex_t
* reg
,
254 const unsigned char* name
, const unsigned char* name_end
, int** nums
)
256 return onig_name_to_group_numbers(ONIG_C(reg
), name
, name_end
, nums
);
260 int (*func
)(const unsigned char*, const unsigned char*,int,int*,regex_t
*,void*);
266 i_wrapper(const UChar
* name
, const UChar
* name_end
, int ng
, int* gs
,
267 onig_regex_t
* reg ARG_UNUSED
, void* arg
)
269 i_wrap
* warg
= (i_wrap
* )arg
;
271 return (*warg
->func
)(name
, name_end
, ng
, gs
, warg
->reg
, warg
->arg
);
275 reg_foreach_name(regex_t
* reg
,
276 int (*func
)(const unsigned char*, const unsigned char*,int,int*,regex_t
*,void*),
285 return onig_foreach_name(ONIG_C(reg
), i_wrapper
, &warg
);
289 reg_number_of_names(regex_t
* reg
)
291 return onig_number_of_names(ONIG_C(reg
));